1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
|
def count_frequency(text): chars = [] ret = [] for char in text: if char in chars: continue else: chars.append(char) ret.append((char, text.count(char))) return ret
class Node: def __init__(self, frequency): self.left = None self.right = None self.father = None self.frequency = frequency def is_left(self): return self.father.left == self
def create_nodes(frequency_list): return [Node(frequency) for frequency in frequency_list]
def create_huffman_tree(nodes): queue = nodes[:] while len(queue) > 1: queue.sort(key=lambda item: item.frequency) node_left = queue.pop(0) node_right = queue.pop(0) node_father = Node(node_left.frequency + node_right.frequency) node_father.left = node_left node_father.right = node_right node_left.father = node_father node_right.father = node_father queue.append(node_father) queue[0].father = None return queue[0]
def huffman_encoding(nodes, root): huffman_code = [''] * len(nodes) for i in range(len(nodes)): node = nodes[i] while node != root: if node.is_left(): huffman_code[i] = '0' + huffman_code[i] else: huffman_code[i] = '1' + huffman_code[i] node = node.father return huffman_code
def encode_str(text, char_frequency, codes): ret = '' for char in text: i = 0 for item in char_frequency: if char == item[0]: ret += codes[i] i += 1 return ret
def decode_str(huffman_str, char_frequency, codes): ret = '' while huffman_str != '': i = 0 for item in codes: if item in huffman_str and huffman_str.index(item) == 0: ret += char_frequency[i][0] huffman_str = huffman_str[len(item):] i += 1 return ret if __name__ == '__main__': char_frequency = [('j', 29), ('z', 31), ('7', 25), ('e', 31), ('l', 23), ('6', 37), ('4', 32), ('p', 38), ('h', 27), ('g', 26), ('x', 28), ('i', 25), ('u', 27), ('n', 25), ('8', 36), ('0', 24), ('o', 23), ('c', 28), ('y', 24), ('1', 29), ('b', 26), ('m', 27), ('2', 28), ('v', 25), ('d', 33), ('f', 28), ('9', 33), ('t', 21), ('w', 22), ('a', 31), ('r', 24), ('s', 16), ('k', 32), ('5', 25), ('q', 23), ('3', 32), ('{', 1), ('-', 4), ('}', 1)] nodes = create_nodes([item[1] for item in char_frequency]) root = create_huffman_tree(nodes) codes = huffman_encoding(nodes, root) huffman_str = '0111110001000011001010001111011110101010011011011110100000110010111101000010010010001100001110010000011110011101101111011001111101000000111010100000101101001000111100000000010100110100101001011101110010001100011100010010111001100011100110011010011000101010100011011110001111111110111001011100010100101111100001011011001001001000010111110101110111010111100010111011000011001011001101001010010111111001110101000110001001001100101110111101111000110010010111111000111110000101001100100100001001110100101011111101111110011101011101000000100100100011111111001000101110101001001101110001011101101001001001011010000101111111001011111100110010100111111110001001100100010010010011110111110110110001101000010010110110001011010000100011010111110101110000110000010001111111110000101000100101101111000111100101101011001100010101011000110010011111001010011110100100011000101111110111011011000011011010100011011100010001010001010000000001101001010010100111111010010110110011110100101010010101001010100010101011010011110001000011000100001010111001110001100101100001010111011110110111110000001011011111011101101000111111110100111100110011101111100111100101101101101010100110001100100110101011110000011111111100011110011101010011110101010111100111100001000111110111110100010011110011000010000100001100101111101010110101100011100010010100001110001001010110010010010100010101101101001110000101111110101010110110110000010011000111000010001001101101101101100111000011000011010101111010101100101000011011001011000101101110100011110001100111101111011000100110110000111010101101111101001111111111100001000111000001001011111011110010110101011110001110001101010011000101111100001111111011100110101001000011111101111111011001111110001110111110110010111000111011011110010101010110011001110110011110001111010000011010101000111110111011100101100100100100001111101010011101111100110011100000010100101000111100100011001011111000000111111111000000011111111101110111111001110100100000100000011011111010000000011110101110111101101011001111011010101111000010110001101000111000111000001110110111000100011110101100100100011100111100101101010010110101011111110011100100000111011011010101101110111000001001100110111001000111001000000111000110010110000100100010001001111010101000101101111000000110101110011101001011100110111101101111100001111000110001101010000111100100011110001100110111001101011100010101011110111111111100101100101010001101110101101101010101001110100001101011000100001111011011100101011000001001000011011000111011101110011001101110100000010100000101111010000001000011001101101111010011101000000101101101011101001101110000010011110001110100111000101111101010110111010011010011000011000110110010110001001000101101111000010010001011110100010111010100101100101111010100001110111100000100101101011110010110001000111111001000000101110010111010001101101111101110111000010101100100001010101001010010001011101001100101010101001111110000010011010011110101001001001110010110100111011110110000111101000010011111000111111001111010011101011010011100010001111101001110011110101111111111111011010100000010100010010011110100110011011101011101011101100000100111110111100100000101011000110110000010110001001111111111011101011000010101111110111001011101111111100111011101001000111011110110111101001011110011000110011000010011011001001100010010111110000110100001110111100110110100101010010111001001100101111010010001001111111000010111101010110000001110101000111011010111100110101001001110001110001001111110001000010011011110100111011111000101111110011000011010001000101000110011100011001001011000111011100101101000110001110011011101010101001010011101110100100111101011101010011010101010111101110101101000001111100111111010011010111101000101011111101011100101101101001100011001111101111100100111101101101110111111010111010100100101110111000011100001001000011100010101110100111110011001100101111110110100111101000010001000011011110000011010110111010110001110011111110000011110010001011010010111111101010101110010000001010011111011100101000101101010101101101000101000110011101101010110001100101011101110111100000001010000011110011010011000011111110100111011100100111000001101001110111100000101010110000010000100001110111000011111110010010100111111101010110000000000111011010000101100100111001110000001011101100000110110101011001011000111001111110010101111001011011101000010001100011101110010100111000011111001110001100111110110111101010101011001000101011010001100000010001111110011001101111111010110010001111001100111110001110011100010011011100100010011011000110000100101111100111110111101010010001101010011100110001011001111000100011011110100011101011101010111111110000011110110111011110000010111100110011100011010111101111110100000010001111100101100011110001101011111101111111011111011101010001101001000111000101111110101000110011000111011111101111110100001111011110010100011101110111111010101100111000101100100010011101001011110011111001111101110001110111111011100111100010110010011011010100011100101010101010110000001010111001101111100111110010100111000010101111001110011011011111001101110011111001000000000111101011000111110001101010011011000010100100100111011111110010000000101001111111110101100001010000001110100101001111001011011001001001011100101111110' origin_str = decode_str(huffman_str, char_frequency, codes) print 'Encode result:' + huffman_str print 'Decode result:' + origin_str
|