なんかできた。

圧縮プログラムenc、伸張プログラムdec。

% cc enc.c -o enc
% cc dec.c -o dec
% cat hoge
abc abbc abc cde cde cdeeacd
cabe abe cafe age fage
aaaabbbbcccc abc abc abcc abc
fbcd fbcd fcbd ddcb dcb
% ./enc hoge
% ll hoge*
-rw-r--r--    1 sun31048 J03A          106 Nov 19 11:01 hoge
-rw-r--r--    1 sun31048 J03A           91 Nov 19 11:02 hoge.huf
% rm -f hoge
% ./dec hoge.huf
% ll hoge*
-rw-r--r--    1 sun31048 J03A          106 Nov 19 11:03 hoge
-rw-r--r--    1 sun31048 J03A           91 Nov 19 11:02 hoge.huf
% cat hoge
abc abbc abc cde cde cdeeacd
cabe abe cafe age fage
aaaabbbbcccc abc abc abcc abc
fbcd fbcd fcbd ddcb dcb

というわけで、おーなんかできてるくせー。辞書部分がでかくなるので、あんまり縮みませんね。

% ll jpn.txt*
-rw-r--r--    1 sun31048 J03A       231295 Nov 19 11:06 jpn.txt
-rw-r--r--    1 sun31048 J03A       149476 Nov 19 11:06 jpn.txt.huf

日本語テキストだったらこの程度になった。辞書をどうやって格納するかって難しいことだなあ。めんどくさかったので、出現頻度表をそのままぶちこんだ。以下比較。

% compress -c jpn.txt >jpn.txt.Z
% gzip -c jpn.txt >jpn.txt.gz
% bzip2 -c jpn.txt >jpn.txt.bz2
% ll jpn.txt.*
-rw-r--r--    1 sun31048 J03A       117761 Nov 19 11:19 jpn.txt.Z
-rw-r--r--    1 sun31048 J03A        87057 Nov 19 11:19 jpn.txt.bz2
-rw-r--r--    1 sun31048 J03A       104777 Nov 19 11:19 jpn.txt.gz
-rw-r--r--    1 sun31048 J03A       149476 Nov 19 11:06 jpn.txt.huf

まあ当然だけど負けまくり。

なんかビット演算に慣れればそんなに難しくなかったのかもしれない。
以下ソースとか。
気紛れでファイル名関数名変数名は変えまくりですが。

圧縮プログラムenc.c

#include <stdio.h>
#include <string.h>

#define HCODEMAX 256

typedef struct{
  unsigned char c;
  int count;
} item;
struct node{
  item data;
  struct node *lbranch, *rbranch;
};

struct node** isort_count(struct node *ctable[], int length){
  int i, j;
  struct node *tmp;
  for(i=1;i<length;++i){
    tmp = ctable[i];
    for(j=i;j>0 && ctable[j-1]->data.count>tmp->data.count;--j)
      ctable[j] = ctable[j-1];
    ctable[j] = tmp;
  }
  return ctable;
}
struct node* mknode(struct node *lbranch, struct node *rbranch){
  struct node *n;
  n = (struct node *)malloc(sizeof(struct node));
  n->data.count = 0;
  n->lbranch = lbranch;
  n->rbranch = rbranch;
  return n;
}
struct node* mktree(struct node *ctable[], int length){
  struct node *root, *n;

  if(length == 1) return ctable[0];
  n = mknode(ctable[0], ctable[1]);
  n->data.count = ctable[0]->data.count + ctable[1]->data.count;
  ctable[1] = n;
  isort_count(&ctable[1], length-1);
  root = mktree(&ctable[1], length-1);

  return root;
}
int print_ctable(FILE *fp, struct node *ctable[]){
  int i;
  for(i=0;i<256;++i){
    if(ctable[i]->data.count > 0){
      fwrite(&(ctable[i]->data.c), 1, 1, fp);
      fwrite(&(ctable[i]->data.count), 4, 1, fp);
    }
  }
}
int mkhtable(char *htable[], struct node *root, char *precode){
  char lcode[HCODEMAX], rcode[HCODEMAX];
  strcpy(lcode, precode);
  strcpy(rcode, precode);

  if(root->lbranch != NULL)
    mkhtable(htable, root->lbranch, strcat(lcode, "0"));
  if(root->rbranch != NULL)
    mkhtable(htable, root->rbranch, strcat(rcode, "1"));
  if(root->lbranch == NULL && root->rbranch == NULL){
    htable[root->data.c] = (char *)malloc(sizeof(char)*HCODEMAX);
    strcpy(htable[root->data.c], precode);
  }

  return 1;
}
int putbits(char *bitstream, FILE *fp){
  static int length = 0;
  static unsigned char bits = 0;
  if(bitstream == NULL){
    bits = bits << 8 - length;
    fwrite(&bits, 1, 1, fp);
    fwrite(&length, 1, 1, fp);
  } else {
    while(*bitstream != '\0'){
      bits = (bits << 1) | (*bitstream - '0');
      ++length;
      if(length == 8){
        fwrite(&bits, 1, 1, fp);
        length = 0;
        bits = 0;
      }
      ++bitstream;
    }
  }
  return 1;
}
int write_archive(FILE *arch, FILE *plain, char *htable[]){
  int c;
  while((c = fgetc(plain)) != EOF) putbits(htable[c], arch);
  putbits(NULL, arch);
  return 1;
}
int encode(char *plainname){
  char archname[256];
  int i;
  FILE *plain, *arch;
  struct node *ctable[256], *root;
  char *htable[256];

  strcpy(archname, plainname);
  strcat(archname, ".huf");

  if((plain = fopen(plainname, "r")) == NULL){
    perror("cannot open plain");
    return -1;
  }
  if((arch = fopen(archname, "w")) == NULL){
    perror("cannot open arch");
    return -1;
  }

  for(i=0;i<256;++i){
    ctable[i] = mknode(NULL, NULL);
    ctable[i]->data.count = 0;
    ctable[i]->data.c = i;
  }
  while((i=fgetc(plain)) != EOF) ++ctable[i]->data.count;

  print_ctable(arch, ctable);

  isort_count(ctable, 256);
  for(i=0;ctable[i]->data.count == 0;++i);
  root = mktree(&ctable[i], 256-i);

  for(i=0;i<256;++i) htable[i] = NULL;
  mkhtable(htable, root, "");

  fseek(plain, 0, SEEK_SET);

  i = 0;
  fwrite(&i, 1, 1, arch);
  fwrite(&i, 4, 1, arch);
  write_archive(arch, plain, htable);

  fclose(plain);
  fclose(arch);
  return 1;
}
int main(int argc, char *argv[]){
  if(argc>1)
    if(!encode(argv[argc-1]))
      puts("cannot encode");
  return 0;
}

dec.c

#include <stdio.h>
#include <string.h>

#define HCODEMAX 256

typedef struct{
  unsigned char c;
  int count;
} item;
struct node{
  item data;
  struct node *lbranch, *rbranch;
};

struct node** isort_count(struct node *ctable[], int length){
  int i, j;
  struct node *tmp;
  for(i=1;i<length;++i){
    tmp = ctable[i];
    for(j=i;j>0 && ctable[j-1]->data.count>tmp->data.count;--j)
      ctable[j] = ctable[j-1];
    ctable[j] = tmp;
  }
  return ctable;
}
struct node* mknode(struct node *lbranch, struct node *rbranch){
  struct node *n;
  n = (struct node *)malloc(sizeof(struct node));
  n->data.count = 0;
  n->lbranch = lbranch;
  n->rbranch = rbranch;
  return n;
}
struct node* mktree(struct node *ctable[], int length){
  struct node *root, *n;

  if(length == 1) return ctable[0];
  n = mknode(ctable[0], ctable[1]);
  n->data.count = ctable[0]->data.count + ctable[1]->data.count;
  ctable[1] = n;
  isort_count(&ctable[1], length-1);
  root = mktree(&ctable[1], length-1);

  return root;
}
char getbits(int data[], struct node *root, FILE *plain){
  static struct node *n = NULL;
  int i, length;
  length = 0;
  if(n == NULL) n = root;
  for(i=128;i>0;i=i>>1){
    if(data[2] == EOF && data[1] == length) return 1;
    if((i & data[0]) == 0){
      n = n->lbranch;
    } else {
      n = n->rbranch;
    }
    if(n->lbranch == NULL && n->rbranch == NULL){
      fputc(n->data.c, plain);
      n = root;
    }
    ++length;
  }
  return 1;
}
int read_archive(FILE *arch, FILE *plain, struct node *root){
  int i, data[3];

  data[0] = fgetc(arch);
  data[1] = fgetc(arch);
  while((data[2] = fgetc(arch)) != EOF){
    getbits(data, root, plain);
    data[0] = data[1];
    data[1] = data[2];
  }
  getbits(data, root, plain);

  return 1;
}
int get_ctable(struct node *ctable[], FILE *arch){
  int i, c;
  unsigned int count;
  i = 0;
  while(1){
    fread(&c, 1, 1, arch);
    fread(&count, 4, 1, arch);
    if(c==0 && count==0) return i;
    ctable[i] = mknode(NULL, NULL);
    ctable[i]->data.c = c;
    ctable[i]->data.count = count;
    ++i;
  }
  return 0;
}
int decode(char *archname){
  char plainname[256], *cp;
  int i, ctable_len;
  FILE *plain, *arch;
  struct node *ctable[256], *root;

  strcpy(plainname, archname);
  cp = strstr(plainname, ".huf");
  *cp = '\0';

  if((arch = fopen(archname, "r")) == NULL){
    perror("cannot open arch");
    return -1;
  }
  if((plain = fopen(plainname, "w")) == NULL){
    perror("cannot open plain");
    return -1;
  }

  ctable_len = get_ctable(ctable, arch);

  isort_count(ctable, ctable_len);
  root = mktree(ctable, ctable_len);

  read_archive(arch, plain, root);

  fclose(plain);
  fclose(arch);
  return 1;
}
int main(int argc, char *argv[]){
  if(argc>1)
    if(!decode(argv[argc-1]))
      puts("cannot decode");
  return 0;
}

ちょっとしたことですぐにゲロ吐いて死ぬプログラムです。完成度としては「条件さえ整えれば、ちゃんと動作することもある」といったところ。

test