| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- #include <stdio.h>
- #include <stdlib.h>
- #include "huffman.h"
- // count the weights of characters in the file filename
- void count_weights(char *filename, int *weights){
- // store last character read
- char ch = 0;
- // open file for reading
- FILE * fptr = NULL;
- fptr = fopen(filename, "r");
- if (fptr == NULL){ // check for error
- printf("File name error.\n");
- return;
- }
- // count frequency of each ascii character
- while((ch = fgetc(fptr)) != EOF){
- weights[(int)ch]++;
- }
- }
- int build_list(Huff_Node **head, int *weights){
- Huff_Node *node = NULL;
- int weight_total = 0;
- int i = 0;
- for(i = 0; i < ASCII_SIZE; i++){
- weight_total = weight_total + weights[i];
- if(weights[i] > 0){
- node = create_huff_node(i, weights[i]);
- add_list_node(head, node);
- }
- }
- return weight_total;
- }
- // creates a new node
- Huff_Node * create_huff_node(char ascii, int weight){
- Huff_Node *node = malloc(sizeof(Huff_Node));
- node->ascii = ascii;
- node->weight = weight;
- node->next = NULL;
- node->left = NULL;
- node->right = NULL;
- return node;
- }
- // adds node to list in ascending order by weight
- void add_list_node(Huff_Node **headp, Huff_Node *node){
- Huff_Node *cur = NULL;
- if(*headp == NULL || node->weight >= (*headp)->weight){
- node->next = *headp;
- *headp = node;
- }
- else{
- cur = *headp;
- while(cur->next != NULL && cur->next->weight > node->weight){
- cur = cur->next;
- }
- node->next = cur->next;
- cur->next = node;
- }
- }
- // gets the first element from the list
- Huff_Node * remove_min_node(Huff_Node *head){
- Huff_Node *node = head;
- Huff_Node *retval = head;
- while(node->next != NULL){
- retval = node;
- node = node->next;
- }
- retval->next = NULL;
- retval = node;
- return retval;
- }
- // prints the list values in order
- void print_list(Huff_Node *head){
- Huff_Node *cur = head;
- while(cur != NULL){
- printf("ascii: %c\tweight: %d\n", cur->ascii, cur->weight);
- cur = cur->next;
- }
- }
- // generate binary tree with weighted parent nodes
- Huff_Node * gen_huff_tree(Huff_Node *head, int weight_total){
- Huff_Node *parent = NULL;
- while(head->weight < weight_total){
- parent = create_huff_node(NO_CHAR, 0);
- parent->left = remove_min_node(head);
- parent->right = remove_min_node(head);
- parent->weight = parent->left->weight + parent->right->weight;
- add_list_node(&head, parent);
- }
- return head;
- }
- // prints the tree values in-order
- void print_tree(Huff_Node * root){
- if(root == NULL){
- return;
- }
- print_tree(root->left);
- if(root->ascii >= 0){
- printf("ascii: %c\tweight: %d\n", root->ascii, root->weight);
- }
- print_tree(root->right);
- }
- // calculates the height of the left and right subtrees and returns the larger value (to get the total height)
- int get_tree_height(Huff_Node *root){
- return get_tree_height_rh(root, 0);
- }
- // recursive helper function for get_tree_height
- int get_tree_height_rh(Huff_Node *root, int height){
- // stop recursio-
- if(root == NULL){
- return height;
- }
- // recurse
- int left_height = get_tree_height_rh(root->left, height + 1);
- int right_height = get_tree_height_rh(root->right, height + 1);
- // return the larger value
- if(left_height < right_height){
- return right_height;
- }
- return left_height;
- }
- // counts the number of leaf nodes in the tree root
- int get_num_leaves(Huff_Node *root){
- int leaves = 0;
- get_num_leaves_rh(root, &leaves);
- return leaves;
- }
- // recursive helper function for get_num_leaves
- void get_num_leaves_rh(Huff_Node *root, int *leaves){
- if(root == NULL){
- return;
- }
- if(root->ascii >= 0){
- (*leaves)++;
- return;
- }
- get_num_leaves_rh(root->left, leaves);
- get_num_leaves_rh(root->right, leaves);
- }
- // generate the codebook and return number of characters
- void gen_codes(Huff_Node * root, int ** codebook){
- int row = 0;
- gen_codes_rh(root, codebook, &row, 1);
- }
- // recursive helper function to generate the codebook
- void gen_codes_rh(Huff_Node *root, int ** codebook, int * row, int col){
- int numRow = 0;
- int i = 0;
- // recurse through tree
- if(root == NULL){
- return;
- }
- Huff_Node * left_child = root->left;
- Huff_Node * right_child = root->right;
- if(root->ascii >= 0){ // this node is a leaf node
- codebook[*row][0] = root->ascii;
- (*row)++;
- return;
- }
- if(left_child != NULL){
- numRow = get_num_leaves(left_child);
- for(i = *row; i < (*row) + numRow; i++){
- codebook[i][col] = 0;
- }
- gen_codes_rh(left_child, codebook, row, col + 1);
- }
- if(right_child != NULL){
- numRow = get_num_leaves(right_child);
- for(i = *row; i < (*row) + numRow; i++){
- codebook[i][col] = 1;
- }
- gen_codes_rh(right_child, codebook, row, col + 1);
- }
- }
- // print the codes to stdout in a human-readable format
- void print_codes(int ** codebook, int numRow){
- int row = 0;
- for(row = 0; row < numRow; row++){
- printf("ascii: %c\t", codebook[row][0]);
- int col = 1;
- printf("code: ");
- while(codebook[row][col] != -1){
- printf("%d", codebook[row][col]);
- col++;
- }
- printf("\n");
- }
- }
- void write_bit(FILE *fptr, char bit, unsigned char *bit_pos, unsigned char *cur_byte){
- if((*bit_pos) == 0){
- *cur_byte = 0;
- }
- unsigned char shifted_bit = bit << (ASCII_BYTE_SIZE - 1 - (*bit_pos));
- *cur_byte |= shifted_bit;
- if((*bit_pos) == ASCII_BYTE_SIZE - 1){
- fputc(*cur_byte, fptr);
- }
- *bit_pos = ((*bit_pos) + 1) % ASCII_BYTE_SIZE;
- }
- void read_bit(FILE *fptr, unsigned char *bit, unsigned char *bit_pos, unsigned char * cur_byte){
- if((*bit_pos) == 0){
- if(fread(cur_byte, ASCII_BYTE_SIZE, 1, fptr) != 1){
- return;
- }
- }
- unsigned char shifted_bit = (*cur_byte) >> (ASCII_BYTE_SIZE - 1 - (*bit_pos));
- shifted_bit = shifted_bit & 0x01;
- (*bit_pos) = ((*bit_pos) + 1) % ASCII_BYTE_SIZE;
- *bit = shifted_bit;
- return;
- }
- void write_file_header(FILE *fptr, int *weights){
- int i = 0;
- for(i = 0; i < ASCII_SIZE; i++){
- if(weights[i] > 0){
- fprintf(fptr, "%c%d", i, weights[i]);
- }
- }
- fprintf(fptr, "\n");
- }
- Huff_Node * read_file_header(FILE *fptr){
- Huff_Node *head = NULL;
- //Huff_Node *root = NULL;
- char ascii = 0;
- int weight = 0;
- int weights[ASCII_SIZE] = {0};
- int weight_total = 0;
- while(fscanf(fptr, "%c%d", &ascii, &weight) > 1 && ascii != '\n'){
- printf("%c%d", ascii, weight);
- weights[(int)ascii] = weight;
- weight_total = weight_total + weight;
- }
- build_list(&head, weights);
- head = gen_huff_tree(head, weight_total);
- return head;
- }
- void write_file_header_bits(FILE *fptr, Huff_Node *root, int weight_total, unsigned int num_leaves){
- unsigned char bit_pos = 0;
- unsigned char cur_byte = 0;
- fwrite(&weight_total, sizeof(int), 1, fptr);
- write_file_header_bits_rh(fptr, root, &bit_pos, &cur_byte);
- write_bit(fptr, 0, &bit_pos, &cur_byte);
- // pad zeros
- while(bit_pos != 0){
- write_bit(fptr, 0, &bit_pos, &cur_byte);
- }
- fwrite(&num_leaves, sizeof(unsigned int), 1, fptr);
- unsigned char line_break = '\n';
- fwrite(&line_break, sizeof(unsigned int), 1, fptr);
- }
- void write_file_header_bits_rh(FILE *fptr, Huff_Node *root, unsigned char *bit_pos, unsigned char *cur_byte){
- if(root == NULL){
- printf("Error.");
- }
- Huff_Node *left = root->left;
- Huff_Node *right = root->right;
- if((left == NULL) && (right == NULL)){
- write_bit(fptr, 1, bit_pos, cur_byte);
- write_ascii_bits(fptr, root->ascii, bit_pos, cur_byte);
- return;
- }
- write_file_header_bits_rh(fptr, left, bit_pos, cur_byte);
- write_file_header_bits_rh(fptr, right, bit_pos, cur_byte);
- write_bit(fptr, 0, bit_pos, cur_byte);
- }
- void write_ascii_bits(FILE *fptr, char ascii, unsigned char *bit_pos, unsigned char *cur_byte){
- unsigned char mask = 0x40;
- while(mask > 0){
- write_bit(fptr, (ascii & mask) == mask, bit_pos, cur_byte);
- mask >>= 1;
- }
- }
- void map_codebook(int ** codebook, int size, int * map){
- int i = 0;
- int j = 0;
- for(i = 0; i < ASCII_SIZE; i++){
- map[i] = -1;
- for(j = 0; j < size; j++){
- if(codebook[j][0] == i){
- map[i] = j;
- }
- }
- }
- }
- void write_file_data(FILE *fptr_out, FILE *fptr_in, int ** codebook, int * map){
- char ch = 0;
- int ascii_i = 0;
- int code_i = 0;
- while((ch = fgetc(fptr_in)) != EOF){
- ascii_i = map[(int)ch];
- code_i = 1;
- while(codebook[ascii_i][code_i] != -1){
- fprintf(fptr_out, "%d", codebook[ascii_i][code_i]);
- code_i++;
- }
- }
- }
- char * read_file_data(FILE *fptr, Huff_Node *root){
- ssize_t read = 0;
- //int ch = 0;
- char * line = NULL;
- int linec = 1;
- size_t len = 0;
- while ((read = getline(&line, &len, fptr)) != -1) {
- //printf("Retrieved line of length %zu :\n", read);
- if(linec > 1){
- return line;
- }
- linec++;
- }
- return line;
- }
- void write_encoded_message(char * data, Huff_Node *root, FILE *fptr){
- Huff_Node *cur = root;
- char ch = ' ';
- int i = 0;
- while(ch != '\0'){
- if(cur->ascii >= 0){
- fprintf(fptr, "%c", cur->ascii);
- cur = root;
- }
- ch = data[i];
- if(ch == '0'){
- cur = cur->left;
- }
- if(ch == '1'){
- cur = cur->right;
- }
- i++;
- }
- if(root == NULL){
- return;
- }
- }
- int main (int argc, char **argv){
- if(argc != 2){
- printf("Invalid number of arguments.\n");
- return EXIT_FAILURE;
- }
-
- char input[256];
- strcpy(input, argv[1]);
- strcat(argv[1], ".unhuff");
- FILE *fptr = fopen(input, "r");
- Huff_Node *root = read_file_header(fptr);
- rewind(fptr);
- char * data = read_file_data(fptr, root);
- FILE *out = fopen(argv[1], "w");
- write_encoded_message(data, root, out);
- fclose(fptr);
- fclose(out);
- return EXIT_SUCCESS;
- }
|