紧缩算法(Compression)

行程长度紧缩算法(Run-Length Encoding (RLE))

RLE可能是进行紧缩的最简略办法。假设数据如下:算法的特征

aaaaabbbcdeeeeeeef...

然后RLE将其编码为:

5a3b1c1d7e1f...

数据有许多重复字节时,RLE能够节约相当多的空间。它在图画上工作杰出。

紧缩代码

extension Data {
public fun数组公式c compressRLE() -> Data {
var data = Data()
self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
var ptr = uPtr
let end = ptr + c数组ount
while ptr < end {
var count = 0
var byte = ptr.pintee
varapplication next = byte
while next == byte && ptr < end && count < 64 {appear
ptr = ptr.advanced(by: 1)
next = ptr.pointee
count += 1
}
if count > 1数组和链表的差异 || byte >= 192 {
var size = 191 + UInt8(count)
data.append(&size, count数组公式: 1)
data.append优先级调度(&size, co优先级unt: 1)
} else {
data.ap数组c言语pend(&byte, count: 1)
}
}
}
return data
}
}

解紧缩代码

public func decompressRLE() -> Data {
var data = Data()优先级调度
self.withUnsafeBytes { (uPtr: Uappearnsaf数组函数的使用办法ePointer<UInt8>) in
var ptr = uPtr
let end = ptr + count
whiappearancele ptr < end {
// Read the n算法剖析的目的是ext byte. This is either a single valu数组函数的使用办法e less than 192,
// or the start of a byte run.
var byte = ptr.pointee
ptr = ptr.advanced(by: 1)
if byte < 192 {
data.append(&byte, count: 1)数组排序
} else if ptr < end {appetite
// Read the actual data value.
var value = ptr.point数组词ee
ptr = ptr.advanced(数组函数的使用办法by: 1)
// And write it out repeatedly.
for _ in 0 ..< byte - 191 {
data.append(算法剖析的目的是&value, count: 1)
}
}
}
}
return data
}

霍夫曼编码(Huffman Coding算法的特征)

举比如是最直观简略的阐approach

假设有以下文本

so much words wow many compression优先级越小越优先吗

核算每个字数组排序节呈现的频率

space:优先级是什么意思 5                   u: 1
0: 5                   h: 1
s: 4                   d: 1
m: 3                   a: 1
w: 3                   y: 1
c: 2                   p: 1
r: 2                   e: 1
n: 2                   i: 1

能够这样来编码

sapce: 5app是什么意思    010         u: 1    11001
0: 5    000         h: 1    10001
s: 4    101         d: 1    11010
m: 3    111         a: 1    11011
w: 3    0010        yapple: 1    01111
c: 2    0011        p: 1    11000
r: 2    1001        e: 1    01110
n: 2    0110        i: 1    10000
101 000 010 111 11001 0011 10001 010 0010 000 1算法的有穷性是指001 11010 101
s   o   _   m   u     c    h     _   w    o   r    d     s
010 0010 000 0010 010 111 11011 0110 01111 010 0011 000 111
_   w    o   w    _   mappreciate   a     n    y     _   c    o   m
11000 1001 01110 101 101 10000 000 0110 0
p     r    e     s   s   i     o   n

在小输入上运用霍夫曼编码是倒霉的。

辅佐代码

NSData理解的最小数据是字节,但咱们处理的是比特,所以咱们需求在两者之间数组c言语进行转化

Npublic cla优先级排序ss Bi算法的时刻复杂度取决于tWriter {
public数组初始化 var data = NSMutableData()
var outByte: UInt8 = 0
var outCount = 0
public func writeBit(bit:算法导论 Bool) {
if outCount == 8 {
data.append(&outByte, length:优先级是什么意思 1)算法的特征
outCount = 0
}
outByte = (outB数组的界说yte &lappstoret;< 1) | (bit ? 1 :app是什么意思 0)
outCount += 1
}
public fu算法的时刻复杂度取决于nc flush() {
if outCount > 0 {
if outCount < 8 {
let diff = UInt8appear(8 - outCount)
outByte <<= diff
}
data.append(&a数组指针mp;outByte, length: 1)
}
}
}

用于从NSData读取各个位:

public class BitReader {
var ptr: UnsafePointer<UInt8>
var inByte: UInt8 = 0
var inCount = 8
public i数组排序nit(data: NSData) {
ptr = data.bytes.assumappeari数组词ngMemoryBound(to: UI数组排序nt8.self)
}
public func readBit() -&gappeart; Bool {
if inCount == 8 {
inByte = ptr.pointee    // load the next byte
inCappointmentount = 0
ptr = ptr.successor()
}
let bit = inByte &a优先级英文mp; 0x80   // read the next bit
inByte <<= 1
inCount += 1
ret算法导论urn bit == 0 ? false : true
}
}
class Huffman {
typealias NodeInd算法的特征ex = Int
struct Node {
var countappointment =优先级英文 0
va数组和链表的差异r index: NodeIndex = -1
var parent: NodeIndex = -1
var left: NodeIndex = -1
var right: NodeIndex = -1
}
var tree = [Node](repeating: Nod算法工程师e(), count: 256)
var roo算法导论t: NodeIndex = -1
}

核算数组指针输入数据中每个字节呈现的频率

fileprivate func countByteFrequency(inData data: NSData) {
var ptr = data.by算法的特征tes.assumingMemoryBound(算法导论to: UInt数组指针8.self)算法的时刻复杂度取决于
for _ in 0..<data.算法的五个特性length {
let i = In算法的特征t(ptr.pointee)
tree[i].count += 1
tree[i].index = i
ptr = ptr.successor()
}
}

导出频率表

struct Freq {
var byte: UInt8 = 0
var counappointmentt = 0
}
func frequencyTable() -> [Freq] {
var a = [Freq]()
for i in 0..<256 where tree[i].count > 0 {
a.append(Freq(byte: UInt8(i), count: tree[i].count))
}
return a
}

运用优先级行列

fileprivate func buil算法的时刻复杂度取决于dTree() {
var queu算法的时刻复杂度取决于e = PriortyQu算法规划与剖析eue<Node>(sort: { $0.counappeart &l数组初始化t; $1.count })
for node in tree wher算法的特征e node.count > 0 {
queue.enqueue(node)
}
while queue.count > 1 {
let node1app是什么意思 = queue.dequeue()!
let优先级 node2 = queue.dequeue()!
var parentNode = Node()
parentNode.counappearancet = node1.count + node2.count
parentNode.left = noapplicationde1.index
parentNode.r优先级c言语ight = node2appetite.index
parentNode.index = tree.count
treeAPP.append(parentNode)
tree[node1.index].parent = parentNode.index
tree[node2.index优先级行列].parent = parentNode.index
queue.enqueue(parentNode)
}
let rootNode = queu数组c言语e.dequeue(算法工程师)!
root = rootNode.index
}数组函数的使用办法

现在咱们知道怎样从频率表构建紧缩算法的特征树,咱们能够用它来紧缩NSData政策appointment的内容。

public func compressData(data: NSData) -> NSData {
countByteFrequency(appointmentinData: data)
buil数组排序dTree(数组指针)
let writer = BitWriter()
var ptr = data.bytes.assumingMemoryBound(算法的时刻复杂度是指什么to: UInt8.self)数组和链表的差异
for _ in 0..<data.length {
let c = ptr.pointee
l数组排序et i = Int(c)
traverseTree(writer: writer, nodeIndex: i, childIndex:优先级排序 -1)
ptr = ptr.suc优先级c言语cessor()
}
writer.flush()
return writerapproach.data
}

留神:紧缩总是需求两次遍历整个输优先级英文入数据数组指针: 首要构建频率表,然后将字节转化为紧缩的位序列。

风趣的东西发生在traverseTree()中。这是一种递归办法:算法的特征

private fu数组指针nc traverseTree(writer: BitWriter, nodeIndex h: Int, childInde算法规划与剖析x childappstore: Int) {
if tree[h].paren数组初始化t != -1 {
traverseTree(writer: writer, no数组deIndex: tree[h].parent, childIndex: h)
}
if child != -1 {
if chi算法的有穷性是指ld == tree[h].left {
writer.writeBit(bit: t数组指针r数组公式ue)
} else if child == tree[h].right {
writer.writeBit(bit: false)
}
}
}

像这样运用compressData()appstore办法:

let s1 = "so much words wow m算法工程师any compression"
if let originalData = s1.dataUsingEn优先级coding(NSUTF8StringEncoding) {
let huffman1 = Huffman()
let compressedData = huffman1.compressData(originalData)
print(compressedData.length)
}

解紧缩

首要需求数组和链表的差异一些办法将[Freq]数组转化回紧缩树:

fileprivate func restoreTree(fromTable f优先级c言语requencyTable: [Freq]) {
for freq in frequenappointmentcyTable {
let i算法剖析的目的是 = Int(freq.by数组函数的使用办法te)
tree[i].count = freq.count
tree优先级[i].index = i
}
buildTre数组初始化e()
}
func decompressData(data: NSData, frequencyTa数组函数的使用办法ble: [Freq]) -> NSData {
rest数组初始化oreTree(fromTable: frequencyTable)
let reader = BitReader(data: data)
let outData = NSMutableData()
let by算法的时刻复杂度是指什么teCount = tree[root].count
var i = 0
while i < byteCount {
var b = findLe优先级调度算法afNode(reader: reader, nodeIndex: root)
outData.appen数组的界说d(&b, length: 1)
i += 1
}
return o算法导论utData
}

也是运用辅佐办法遍历树

pri优先级c言语vate func findLeafNode(reader reader: BitReade算法r, nodeIndex: Int) -> UInt8 {
var h = nodeIndex
while tree[h].right != -1 {
if reader.readBit() {
h = tree[h].left
} e优先级调度lse {
h = tree[h].right
}
}
return UInt8(h)
}

怎样运用解紧缩的办法:

let frequencyTable = huffman1.frequencyTable数组去重()
let huffman2 = Huffman()
let decompressedData = huffmappointmentan2.decompressData(compressedData, frequencyTable: frequencyTab算法规划与剖析le: frequencyTable)
let s2 = String(data: decomp数组公式re算法的特征ssedData, encoding: NSUTF8StringEncoding)!