unhuff.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include "huffman.h"
  4. // count the weights of characters in the file filename
  5. void count_weights(char *filename, int *weights){
  6. // store last character read
  7. char ch = 0;
  8. // open file for reading
  9. FILE * fptr = NULL;
  10. fptr = fopen(filename, "r");
  11. if (fptr == NULL){ // check for error
  12. printf("File name error.\n");
  13. return;
  14. }
  15. // count frequency of each ascii character
  16. while((ch = fgetc(fptr)) != EOF){
  17. weights[(int)ch]++;
  18. }
  19. }
  20. int build_list(Huff_Node **head, int *weights){
  21. Huff_Node *node = NULL;
  22. int weight_total = 0;
  23. int i = 0;
  24. for(i = 0; i < ASCII_SIZE; i++){
  25. weight_total = weight_total + weights[i];
  26. if(weights[i] > 0){
  27. node = create_huff_node(i, weights[i]);
  28. add_list_node(head, node);
  29. }
  30. }
  31. return weight_total;
  32. }
  33. // creates a new node
  34. Huff_Node * create_huff_node(char ascii, int weight){
  35. Huff_Node *node = malloc(sizeof(Huff_Node));
  36. node->ascii = ascii;
  37. node->weight = weight;
  38. node->next = NULL;
  39. node->left = NULL;
  40. node->right = NULL;
  41. return node;
  42. }
  43. // adds node to list in ascending order by weight
  44. void add_list_node(Huff_Node **headp, Huff_Node *node){
  45. Huff_Node *cur = NULL;
  46. if(*headp == NULL || node->weight >= (*headp)->weight){
  47. node->next = *headp;
  48. *headp = node;
  49. }
  50. else{
  51. cur = *headp;
  52. while(cur->next != NULL && cur->next->weight > node->weight){
  53. cur = cur->next;
  54. }
  55. node->next = cur->next;
  56. cur->next = node;
  57. }
  58. }
  59. // gets the first element from the list
  60. Huff_Node * remove_min_node(Huff_Node *head){
  61. Huff_Node *node = head;
  62. Huff_Node *retval = head;
  63. while(node->next != NULL){
  64. retval = node;
  65. node = node->next;
  66. }
  67. retval->next = NULL;
  68. retval = node;
  69. return retval;
  70. }
  71. // prints the list values in order
  72. void print_list(Huff_Node *head){
  73. Huff_Node *cur = head;
  74. while(cur != NULL){
  75. printf("ascii: %c\tweight: %d\n", cur->ascii, cur->weight);
  76. cur = cur->next;
  77. }
  78. }
  79. // generate binary tree with weighted parent nodes
  80. Huff_Node * gen_huff_tree(Huff_Node *head, int weight_total){
  81. Huff_Node *parent = NULL;
  82. while(head->weight < weight_total){
  83. parent = create_huff_node(NO_CHAR, 0);
  84. parent->left = remove_min_node(head);
  85. parent->right = remove_min_node(head);
  86. parent->weight = parent->left->weight + parent->right->weight;
  87. add_list_node(&head, parent);
  88. }
  89. return head;
  90. }
  91. // prints the tree values in-order
  92. void print_tree(Huff_Node * root){
  93. if(root == NULL){
  94. return;
  95. }
  96. print_tree(root->left);
  97. if(root->ascii >= 0){
  98. printf("ascii: %c\tweight: %d\n", root->ascii, root->weight);
  99. }
  100. print_tree(root->right);
  101. }
  102. // calculates the height of the left and right subtrees and returns the larger value (to get the total height)
  103. int get_tree_height(Huff_Node *root){
  104. return get_tree_height_rh(root, 0);
  105. }
  106. // recursive helper function for get_tree_height
  107. int get_tree_height_rh(Huff_Node *root, int height){
  108. // stop recursio-
  109. if(root == NULL){
  110. return height;
  111. }
  112. // recurse
  113. int left_height = get_tree_height_rh(root->left, height + 1);
  114. int right_height = get_tree_height_rh(root->right, height + 1);
  115. // return the larger value
  116. if(left_height < right_height){
  117. return right_height;
  118. }
  119. return left_height;
  120. }
  121. // counts the number of leaf nodes in the tree root
  122. int get_num_leaves(Huff_Node *root){
  123. int leaves = 0;
  124. get_num_leaves_rh(root, &leaves);
  125. return leaves;
  126. }
  127. // recursive helper function for get_num_leaves
  128. void get_num_leaves_rh(Huff_Node *root, int *leaves){
  129. if(root == NULL){
  130. return;
  131. }
  132. if(root->ascii >= 0){
  133. (*leaves)++;
  134. return;
  135. }
  136. get_num_leaves_rh(root->left, leaves);
  137. get_num_leaves_rh(root->right, leaves);
  138. }
  139. // generate the codebook and return number of characters
  140. void gen_codes(Huff_Node * root, int ** codebook){
  141. int row = 0;
  142. gen_codes_rh(root, codebook, &row, 1);
  143. }
  144. // recursive helper function to generate the codebook
  145. void gen_codes_rh(Huff_Node *root, int ** codebook, int * row, int col){
  146. int numRow = 0;
  147. int i = 0;
  148. // recurse through tree
  149. if(root == NULL){
  150. return;
  151. }
  152. Huff_Node * left_child = root->left;
  153. Huff_Node * right_child = root->right;
  154. if(root->ascii >= 0){ // this node is a leaf node
  155. codebook[*row][0] = root->ascii;
  156. (*row)++;
  157. return;
  158. }
  159. if(left_child != NULL){
  160. numRow = get_num_leaves(left_child);
  161. for(i = *row; i < (*row) + numRow; i++){
  162. codebook[i][col] = 0;
  163. }
  164. gen_codes_rh(left_child, codebook, row, col + 1);
  165. }
  166. if(right_child != NULL){
  167. numRow = get_num_leaves(right_child);
  168. for(i = *row; i < (*row) + numRow; i++){
  169. codebook[i][col] = 1;
  170. }
  171. gen_codes_rh(right_child, codebook, row, col + 1);
  172. }
  173. }
  174. // print the codes to stdout in a human-readable format
  175. void print_codes(int ** codebook, int numRow){
  176. int row = 0;
  177. for(row = 0; row < numRow; row++){
  178. printf("ascii: %c\t", codebook[row][0]);
  179. int col = 1;
  180. printf("code: ");
  181. while(codebook[row][col] != -1){
  182. printf("%d", codebook[row][col]);
  183. col++;
  184. }
  185. printf("\n");
  186. }
  187. }
  188. void write_bit(FILE *fptr, char bit, unsigned char *bit_pos, unsigned char *cur_byte){
  189. if((*bit_pos) == 0){
  190. *cur_byte = 0;
  191. }
  192. unsigned char shifted_bit = bit << (ASCII_BYTE_SIZE - 1 - (*bit_pos));
  193. *cur_byte |= shifted_bit;
  194. if((*bit_pos) == ASCII_BYTE_SIZE - 1){
  195. fputc(*cur_byte, fptr);
  196. }
  197. *bit_pos = ((*bit_pos) + 1) % ASCII_BYTE_SIZE;
  198. }
  199. void read_bit(FILE *fptr, unsigned char *bit, unsigned char *bit_pos, unsigned char * cur_byte){
  200. if((*bit_pos) == 0){
  201. if(fread(cur_byte, ASCII_BYTE_SIZE, 1, fptr) != 1){
  202. return;
  203. }
  204. }
  205. unsigned char shifted_bit = (*cur_byte) >> (ASCII_BYTE_SIZE - 1 - (*bit_pos));
  206. shifted_bit = shifted_bit & 0x01;
  207. (*bit_pos) = ((*bit_pos) + 1) % ASCII_BYTE_SIZE;
  208. *bit = shifted_bit;
  209. return;
  210. }
  211. void write_file_header(FILE *fptr, int *weights){
  212. int i = 0;
  213. for(i = 0; i < ASCII_SIZE; i++){
  214. if(weights[i] > 0){
  215. fprintf(fptr, "%c%d", i, weights[i]);
  216. }
  217. }
  218. fprintf(fptr, "\n");
  219. }
  220. Huff_Node * read_file_header(FILE *fptr){
  221. Huff_Node *head = NULL;
  222. //Huff_Node *root = NULL;
  223. char ascii = 0;
  224. int weight = 0;
  225. int weights[ASCII_SIZE] = {0};
  226. int weight_total = 0;
  227. while(fscanf(fptr, "%c%d", &ascii, &weight) > 1 && ascii != '\n'){
  228. printf("%c%d", ascii, weight);
  229. weights[(int)ascii] = weight;
  230. weight_total = weight_total + weight;
  231. }
  232. build_list(&head, weights);
  233. head = gen_huff_tree(head, weight_total);
  234. return head;
  235. }
  236. void write_file_header_bits(FILE *fptr, Huff_Node *root, int weight_total, unsigned int num_leaves){
  237. unsigned char bit_pos = 0;
  238. unsigned char cur_byte = 0;
  239. fwrite(&weight_total, sizeof(int), 1, fptr);
  240. write_file_header_bits_rh(fptr, root, &bit_pos, &cur_byte);
  241. write_bit(fptr, 0, &bit_pos, &cur_byte);
  242. // pad zeros
  243. while(bit_pos != 0){
  244. write_bit(fptr, 0, &bit_pos, &cur_byte);
  245. }
  246. fwrite(&num_leaves, sizeof(unsigned int), 1, fptr);
  247. unsigned char line_break = '\n';
  248. fwrite(&line_break, sizeof(unsigned int), 1, fptr);
  249. }
  250. void write_file_header_bits_rh(FILE *fptr, Huff_Node *root, unsigned char *bit_pos, unsigned char *cur_byte){
  251. if(root == NULL){
  252. printf("Error.");
  253. }
  254. Huff_Node *left = root->left;
  255. Huff_Node *right = root->right;
  256. if((left == NULL) && (right == NULL)){
  257. write_bit(fptr, 1, bit_pos, cur_byte);
  258. write_ascii_bits(fptr, root->ascii, bit_pos, cur_byte);
  259. return;
  260. }
  261. write_file_header_bits_rh(fptr, left, bit_pos, cur_byte);
  262. write_file_header_bits_rh(fptr, right, bit_pos, cur_byte);
  263. write_bit(fptr, 0, bit_pos, cur_byte);
  264. }
  265. void write_ascii_bits(FILE *fptr, char ascii, unsigned char *bit_pos, unsigned char *cur_byte){
  266. unsigned char mask = 0x40;
  267. while(mask > 0){
  268. write_bit(fptr, (ascii & mask) == mask, bit_pos, cur_byte);
  269. mask >>= 1;
  270. }
  271. }
  272. void map_codebook(int ** codebook, int size, int * map){
  273. int i = 0;
  274. int j = 0;
  275. for(i = 0; i < ASCII_SIZE; i++){
  276. map[i] = -1;
  277. for(j = 0; j < size; j++){
  278. if(codebook[j][0] == i){
  279. map[i] = j;
  280. }
  281. }
  282. }
  283. }
  284. void write_file_data(FILE *fptr_out, FILE *fptr_in, int ** codebook, int * map){
  285. char ch = 0;
  286. int ascii_i = 0;
  287. int code_i = 0;
  288. while((ch = fgetc(fptr_in)) != EOF){
  289. ascii_i = map[(int)ch];
  290. code_i = 1;
  291. while(codebook[ascii_i][code_i] != -1){
  292. fprintf(fptr_out, "%d", codebook[ascii_i][code_i]);
  293. code_i++;
  294. }
  295. }
  296. }
  297. char * read_file_data(FILE *fptr, Huff_Node *root){
  298. ssize_t read = 0;
  299. //int ch = 0;
  300. char * line = NULL;
  301. int linec = 1;
  302. size_t len = 0;
  303. while ((read = getline(&line, &len, fptr)) != -1) {
  304. //printf("Retrieved line of length %zu :\n", read);
  305. if(linec > 1){
  306. return line;
  307. }
  308. linec++;
  309. }
  310. return line;
  311. }
  312. void write_encoded_message(char * data, Huff_Node *root, FILE *fptr){
  313. Huff_Node *cur = root;
  314. char ch = ' ';
  315. int i = 0;
  316. while(ch != '\0'){
  317. if(cur->ascii >= 0){
  318. fprintf(fptr, "%c", cur->ascii);
  319. cur = root;
  320. }
  321. ch = data[i];
  322. if(ch == '0'){
  323. cur = cur->left;
  324. }
  325. if(ch == '1'){
  326. cur = cur->right;
  327. }
  328. i++;
  329. }
  330. if(root == NULL){
  331. return;
  332. }
  333. }
  334. int main (int argc, char **argv){
  335. if(argc != 2){
  336. printf("Invalid number of arguments.\n");
  337. return EXIT_FAILURE;
  338. }
  339. char input[256];
  340. strcpy(input, argv[1]);
  341. strcat(argv[1], ".unhuff");
  342. FILE *fptr = fopen(input, "r");
  343. Huff_Node *root = read_file_header(fptr);
  344. rewind(fptr);
  345. char * data = read_file_data(fptr, root);
  346. FILE *out = fopen(argv[1], "w");
  347. write_encoded_message(data, root, out);
  348. fclose(fptr);
  349. fclose(out);
  350. return EXIT_SUCCESS;
  351. }