#include #include #include "huffman.h" // count the weights of characters in the file filename void count_weights(char *filename, int *weights){ // store last character read char ch = 0; // open file for reading FILE * fptr = NULL; fptr = fopen(filename, "r"); if (fptr == NULL){ // check for error printf("File name error.\n"); return; } // count frequency of each ascii character while((ch = fgetc(fptr)) != EOF){ weights[(int)ch]++; } } int build_list(Huff_Node **head, int *weights){ Huff_Node *node = NULL; int weight_total = 0; int i = 0; for(i = 0; i < ASCII_SIZE; i++){ weight_total = weight_total + weights[i]; if(weights[i] > 0){ node = create_huff_node(i, weights[i]); add_list_node(head, node); } } return weight_total; } // creates a new node Huff_Node * create_huff_node(char ascii, int weight){ Huff_Node *node = malloc(sizeof(Huff_Node)); node->ascii = ascii; node->weight = weight; node->next = NULL; node->left = NULL; node->right = NULL; return node; } // adds node to list in ascending order by weight void add_list_node(Huff_Node **headp, Huff_Node *node){ Huff_Node *cur = NULL; if(*headp == NULL || node->weight >= (*headp)->weight){ node->next = *headp; *headp = node; } else{ cur = *headp; while(cur->next != NULL && cur->next->weight > node->weight){ cur = cur->next; } node->next = cur->next; cur->next = node; } } // gets the first element from the list Huff_Node * remove_min_node(Huff_Node *head){ Huff_Node *node = head; Huff_Node *retval = head; while(node->next != NULL){ retval = node; node = node->next; } retval->next = NULL; retval = node; return retval; } // prints the list values in order void print_list(Huff_Node *head){ Huff_Node *cur = head; while(cur != NULL){ printf("ascii: %c\tweight: %d\n", cur->ascii, cur->weight); cur = cur->next; } } // generate binary tree with weighted parent nodes Huff_Node * gen_huff_tree(Huff_Node *head, int weight_total){ Huff_Node *parent = NULL; while(head->weight < weight_total){ parent = create_huff_node(NO_CHAR, 0); parent->left = remove_min_node(head); parent->right = remove_min_node(head); parent->weight = parent->left->weight + parent->right->weight; add_list_node(&head, parent); } return head; } // prints the tree values in-order void print_tree(Huff_Node * root){ if(root == NULL){ return; } print_tree(root->left); if(root->ascii >= 0){ printf("ascii: %c\tweight: %d\n", root->ascii, root->weight); } print_tree(root->right); } // calculates the height of the left and right subtrees and returns the larger value (to get the total height) int get_tree_height(Huff_Node *root){ return get_tree_height_rh(root, 0); } // recursive helper function for get_tree_height int get_tree_height_rh(Huff_Node *root, int height){ // stop recursio- if(root == NULL){ return height; } // recurse int left_height = get_tree_height_rh(root->left, height + 1); int right_height = get_tree_height_rh(root->right, height + 1); // return the larger value if(left_height < right_height){ return right_height; } return left_height; } // counts the number of leaf nodes in the tree root int get_num_leaves(Huff_Node *root){ int leaves = 0; get_num_leaves_rh(root, &leaves); return leaves; } // recursive helper function for get_num_leaves void get_num_leaves_rh(Huff_Node *root, int *leaves){ if(root == NULL){ return; } if(root->ascii >= 0){ (*leaves)++; return; } get_num_leaves_rh(root->left, leaves); get_num_leaves_rh(root->right, leaves); } // generate the codebook and return number of characters void gen_codes(Huff_Node * root, int ** codebook){ int row = 0; gen_codes_rh(root, codebook, &row, 1); } // recursive helper function to generate the codebook void gen_codes_rh(Huff_Node *root, int ** codebook, int * row, int col){ int numRow = 0; int i = 0; // recurse through tree if(root == NULL){ return; } Huff_Node * left_child = root->left; Huff_Node * right_child = root->right; if(root->ascii >= 0){ // this node is a leaf node codebook[*row][0] = root->ascii; (*row)++; return; } if(left_child != NULL){ numRow = get_num_leaves(left_child); for(i = *row; i < (*row) + numRow; i++){ codebook[i][col] = 0; } gen_codes_rh(left_child, codebook, row, col + 1); } if(right_child != NULL){ numRow = get_num_leaves(right_child); for(i = *row; i < (*row) + numRow; i++){ codebook[i][col] = 1; } gen_codes_rh(right_child, codebook, row, col + 1); } } // print the codes to stdout in a human-readable format void print_codes(int ** codebook, int numRow){ int row = 0; for(row = 0; row < numRow; row++){ printf("ascii: %c\t", codebook[row][0]); int col = 1; printf("code: "); while(codebook[row][col] != -1){ printf("%d", codebook[row][col]); col++; } printf("\n"); } } void write_bit(FILE *fptr, char bit, unsigned char *bit_pos, unsigned char *cur_byte){ if((*bit_pos) == 0){ *cur_byte = 0; } unsigned char shifted_bit = bit << (ASCII_BYTE_SIZE - 1 - (*bit_pos)); *cur_byte |= shifted_bit; if((*bit_pos) == ASCII_BYTE_SIZE - 1){ fputc(*cur_byte, fptr); } *bit_pos = ((*bit_pos) + 1) % ASCII_BYTE_SIZE; } void read_bit(FILE *fptr, unsigned char *bit, unsigned char *bit_pos, unsigned char * cur_byte){ if((*bit_pos) == 0){ if(fread(cur_byte, ASCII_BYTE_SIZE, 1, fptr) != 1){ return; } } unsigned char shifted_bit = (*cur_byte) >> (ASCII_BYTE_SIZE - 1 - (*bit_pos)); shifted_bit = shifted_bit & 0x01; (*bit_pos) = ((*bit_pos) + 1) % ASCII_BYTE_SIZE; *bit = shifted_bit; return; } void write_file_header(FILE *fptr, int *weights){ int i = 0; for(i = 0; i < ASCII_SIZE; i++){ if(weights[i] > 0){ fprintf(fptr, "%c%d", i, weights[i]); } } fprintf(fptr, "\n"); } Huff_Node * read_file_header(FILE *fptr){ Huff_Node *head = NULL; //Huff_Node *root = NULL; char ascii = 0; int weight = 0; int weights[ASCII_SIZE] = {0}; int weight_total = 0; while(fscanf(fptr, "%c%d", &ascii, &weight) > 1 && ascii != '\n'){ printf("%c%d", ascii, weight); weights[(int)ascii] = weight; weight_total = weight_total + weight; } build_list(&head, weights); head = gen_huff_tree(head, weight_total); return head; } void write_file_header_bits(FILE *fptr, Huff_Node *root, int weight_total, unsigned int num_leaves){ unsigned char bit_pos = 0; unsigned char cur_byte = 0; fwrite(&weight_total, sizeof(int), 1, fptr); write_file_header_bits_rh(fptr, root, &bit_pos, &cur_byte); write_bit(fptr, 0, &bit_pos, &cur_byte); // pad zeros while(bit_pos != 0){ write_bit(fptr, 0, &bit_pos, &cur_byte); } fwrite(&num_leaves, sizeof(unsigned int), 1, fptr); unsigned char line_break = '\n'; fwrite(&line_break, sizeof(unsigned int), 1, fptr); } void write_file_header_bits_rh(FILE *fptr, Huff_Node *root, unsigned char *bit_pos, unsigned char *cur_byte){ if(root == NULL){ printf("Error."); } Huff_Node *left = root->left; Huff_Node *right = root->right; if((left == NULL) && (right == NULL)){ write_bit(fptr, 1, bit_pos, cur_byte); write_ascii_bits(fptr, root->ascii, bit_pos, cur_byte); return; } write_file_header_bits_rh(fptr, left, bit_pos, cur_byte); write_file_header_bits_rh(fptr, right, bit_pos, cur_byte); write_bit(fptr, 0, bit_pos, cur_byte); } void write_ascii_bits(FILE *fptr, char ascii, unsigned char *bit_pos, unsigned char *cur_byte){ unsigned char mask = 0x40; while(mask > 0){ write_bit(fptr, (ascii & mask) == mask, bit_pos, cur_byte); mask >>= 1; } } void map_codebook(int ** codebook, int size, int * map){ int i = 0; int j = 0; for(i = 0; i < ASCII_SIZE; i++){ map[i] = -1; for(j = 0; j < size; j++){ if(codebook[j][0] == i){ map[i] = j; } } } } void write_file_data(FILE *fptr_out, FILE *fptr_in, int ** codebook, int * map){ char ch = 0; int ascii_i = 0; int code_i = 0; while((ch = fgetc(fptr_in)) != EOF){ ascii_i = map[(int)ch]; code_i = 1; while(codebook[ascii_i][code_i] != -1){ fprintf(fptr_out, "%d", codebook[ascii_i][code_i]); code_i++; } } } char * read_file_data(FILE *fptr, Huff_Node *root){ ssize_t read = 0; //int ch = 0; char * line = NULL; int linec = 1; size_t len = 0; while ((read = getline(&line, &len, fptr)) != -1) { //printf("Retrieved line of length %zu :\n", read); if(linec > 1){ return line; } linec++; } return line; } void write_encoded_message(char * data, Huff_Node *root, FILE *fptr){ Huff_Node *cur = root; char ch = ' '; int i = 0; while(ch != '\0'){ if(cur->ascii >= 0){ fprintf(fptr, "%c", cur->ascii); cur = root; } ch = data[i]; if(ch == '0'){ cur = cur->left; } if(ch == '1'){ cur = cur->right; } i++; } if(root == NULL){ return; } } int main (int argc, char **argv){ if(argc != 2){ printf("Invalid number of arguments.\n"); return EXIT_FAILURE; } char input[256]; strcpy(input, argv[1]); strcat(argv[1], ".unhuff"); FILE *fptr = fopen(input, "r"); Huff_Node *root = read_file_header(fptr); rewind(fptr); char * data = read_file_data(fptr, root); FILE *out = fopen(argv[1], "w"); write_encoded_message(data, root, out); fclose(fptr); fclose(out); return EXIT_SUCCESS; }