출처 : http://blog.naver.com/ksw7998/100011413622
사용자로부터 주어진 텍스트 파일을 압축하고 반대로 압축된 파일은 압축을 푸는 프로그램으로호프만 코드 알고리즘을 이용하여 호프만 테이블을 작성한다.
작성된 호프만 테이블로 입력된 텍스트 파일을 하나의 버퍼에 저장 후 파일에 출력 파일에 출력할 때 호프만 코드를 같이 출력한다.
파일 압축을 풀 때 하나의 버퍼에 파일의 내용을 저장 후 현재 프로그램의 버전과의 일치 여부와 코드 파일의 내용을 비교하면서 압축을 푼다.
참고로,2Mbyte파일 압축시75%정도 나온다.
// 사용 방법
압축 할때
실행파일명 -e[E] text.txt output.txt
압축 풀때
실행파일명 -d[D] output.txt text.txt
////////////////////// 소스 코드 ////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define VERSION1.0
#define MAX_SIZE 256
#define CHARBITS 8
#define MAX_CODE_LENGTH16
#define MAX_CODE_NUMBER256
#define MAX_STRING_LENGTH 1024*100 // maximum size of compressed file is 100 kbyte
#define MAX_BIT_MASK_NUMBER 8
#define RIGHTBITS(n, x) ((x) & ((1U << (n)) - 1U))
typedef struct code_tag
{
int ascii;
char code[MAX_CODE_LENGTH];
int lengthOfCode;
unsigned frequency;
} CODE;
int HuffmanCodeTable(CODE codeTable[]);
void SetCode(CODE codeTable[], int cnt);
int EncryptString(char original_string[], int length_Of_Original_string,
CODE codeTable[], int cnt_of_Codes, char encoded_String[]);
void WriteBitString(char encoded_String[], int cnt_Of_EncodedBits);
int DecryptBitString(char encoded_String[], int cnt_Of_EncodeBit,
CODE codeTable[], int cnt_Of_Codes, char decoded_String[]);
int SearchAlphabet(char key, CODE codeTable[], int cnt_Of_Codes);
void AttachCode(char encoded_String[], int cnt_Of_Bits, char code[], int lengthOfCode);
int SearchCode(char key[], CODE codeTable[], int cnt_Of_Codes);
void WriteInFile(char encoded_String[], int cnt_Of_EncodedBits, CODE codeTable[], int cnt_of_codes);
void PrnFreq(CODE codeTable[], int cnt_of_codes);
void Error(char *msg);
void DownHeap(int i);
void GetTableSizeAndCodeBits(int *size_of_table, int *cnt_Of_EncodeBit);
int GetVersion();
void GetCodeTable(CODE codeTable[], int cnt_of_codes);
void GetCode(char encoded_string[]);
void PrnCodeTable(CODE codeTable[], int cnt_of_codes);
void WriteDecodedBit(char decoded_String[]);
char bitMask_OR[MAX_BIT_MASK_NUMBER];
char bitMask_AND[MAX_BIT_MASK_NUMBER];
int heap_size, heap[2*MAX_SIZE-1], parent[2*MAX_SIZE-1], left[2*MAX_SIZE-1], right[2*MAX_SIZE-1];
unsigned long int frequency[2*MAX_SIZE-1];
FILE *ifp, *ofp;
int main(int argc, char *argv[])
{
int i, j = 0;
char Original_String[MAX_STRING_LENGTH];
char encoded_String[MAX_STRING_LENGTH];
char decoded_String[MAX_STRING_LENGTH];
int lengthOfDecodedString;
int cnt_Of_EncodedBits = 0;
CODE codeTable[MAX_CODE_NUMBER];
int cnt_Of_Codes = 0;
char option;
if(argc != 4 || ((option = *argv[1]) != 'e' && option != 'E'
&& option != 'd' && option != 'D'))
{
printf("Usage : %s [e or E|d or D] input_file output_file\n", argv[0]);
exit(1);
}
ifp = fopen(argv[2], "rb");
if(ifp == NULL)
{
printf("Cannot open file : %s", argv[2]);
exit(1);
}
ofp = fopen(argv[3], "wb");
if(ofp == NULL)
{
printf("Cannot open file : %s", argv[3]);
exit(1);
}
for(i=0; i < 8; i++)
{
bitMask_OR[i] = 1 << (7-i);
bitMask_AND[i] = bitMask_OR[i] ^ 0xff;
}
if(option == 'e' || option == 'E')
{
while((i = getc(ifp)) != EOF)
{
Original_String[j++] = i;
if(j >= MAX_STRING_LENGTH)
Error("Error : larger than 100kbyte!\n");
}
Original_String[j]='\0';
rewind(ifp);
cnt_Of_Codes = HuffmanCodeTable(codeTable);
cnt_Of_EncodedBits = EncryptString(Original_String, strlen(Original_String), codeTable, cnt_Of_Codes, encoded_String);
WriteInFile(encoded_String, cnt_Of_EncodedBits, codeTable, cnt_Of_Codes);
}
else
{
if(GetVersion())
Error("Error : The file isn't equivalent the current program
version!\n");
GetTableSizeAndCodeBits(&cnt_Of_Codes, &cnt_Of_EncodedBits);
GetCodeTable(codeTable, cnt_Of_Codes);
GetCode(encoded_String);
lengthOfDecodedString = DecryptBitString(encoded_String, cnt_Of_EncodedBits, codeTable, cnt_Of_Codes, decoded_String);
WriteDecodedBit(decoded_String);
}
fclose(ifp);
fclose(ofp);
return 0;
}
int GetVersion()
{
int i, j = 0;
char buffer[10];
while((i = getc(ifp)) != EOF && j < 3)
{
buffer[j++] = i;
if(j >= MAX_STRING_LENGTH)
Error("Error : larger than 100kbyte!\n");
}
if(atof(buffer) == VERSION)
return 0;
else
return -1;
}
/*************************************************************
* 호프만 테이블 사이즈와 비트의 수 얻기
*
*************************************************************/
void GetTableSizeAndCodeBits(int *size_of_table, int *cnt_Of_EncodeBit)
{
int ch, i = 0;
char buffer[10];
while((ch = getc(ifp)) != EOF && ch != 0x2F)
buffer[i++] = ch;
buffer[i] = '\0';
*size_of_table = atoi(buffer);
i = 0;
while((ch = getc(ifp)) != EOF && ch != 0x20)
buffer[i++] = ch;
*cnt_Of_EncodeBit = atoi(buffer);
return;
}
/*************************************************************
* 호프만 코드 테이블을 메모리에 저장
*
*************************************************************/
void GetCodeTable(CODE codeTable[], int cnt_of_codes)
{
int i = 0, j = 0, tmp, is_block = 0;
while((tmp = getc(ifp)) != EOF || i < cnt_of_codes)
if(!is_block && tmp != '0' && tmp != '1')
{
codeTable[i].ascii = tmp;
}
else if(tmp == '0' || tmp == '1')
{
is_block = 1;
codeTable[i].code[j++] = tmp;
continue;
}
else if(is_block && tmp != '0' && tmp != '1')
{
codeTable[i].code[j] = '\0';
is_block = 0, j = 0;
if(cnt_of_codes == ++i)
break;
codeTable[i].ascii = tmp;
}
}
PrnCodeTable(codeTable, cnt_of_codes);
}
/*************************************************************
* 호프만 코드로 작성된 내용을 버퍼에 저장
*
*************************************************************/
void GetCode(char encoded_string[])
{
int ch, i = 0;
while((ch = getc(ifp)) != EOF)
{
encoded_string[i++] = ch;
if(i >= MAX_STRING_LENGTH)
Error("Error : larger than 100kbyte!\n");
}
encoded_string[i] = '\0';
}
void Error(char *msg)
{
printf("%s", msg);
exit(1);
}
void PrnFreq(CODE codeTable[], int cnt_of_codes)
{
int i, k = 0, size = 0;
printf("텍스트 분석 중...\n");
printf("---------------------------------\n");
printf("alphabet\tfrequency\n");
printf("---------------------------------\n");
for(i = 0; i <= cnt_of_codes; i++)
{
if((codeTable[i].ascii >= 65 && codeTable[i].ascii <= 90)
|| (codeTable[i].ascii >= 97 && codeTable[i].ascii <= 122))
{
printf("%c\t\t%d\n", codeTable[i].ascii, codeTable[i].frequency);
size += codeTable[i].frequency;
}
}
printf("---------------------------------\n");
printf("텍스트의 크기 : %d byte\n", size);
printf("---------------------------------\n\n\n");
}
/*************************************************************
* 버퍼에 저장된 호프만 코드를 사용 빈도수에 맞게 버퍼에 저장
*
*************************************************************/
void DownHeap(int i)
{
int j, k;
while ((j = 2 * i) <= heap_size)
{
if (j < heap_size && frequency[heap[j]] > frequency[heap[j + 1]])
j++;
if (frequency[k] <= frequency[heap[j]])
break;
heap[i] = heap[j];
i = j;
}
heap[i] = k;
}
/*************************************************************
* 호프만 알고리즘을 이용한 코드 테이블 작성
*
*************************************************************/
int HuffmanCodeTable(CODE codeTable[])
{
int i, j, k, avail, cnt_of_codes;
unsigned long int incount;
for(i = 0; i < MAX_SIZE; i++)
frequency[i] = 0;
while ((i = getc(ifp)) != EOF)
frequency[i]++;
heap[i] = 0;
heap_size = 0;
for(i = 0; i < MAX_SIZE; i++)
{
if(frequency[i] != 0)
heap[++heap_size] = i;
for(i = 1; i <= heap_size; i++)
{
codeTable[i-1].ascii = heap[i];
codeTable[i-1].frequency = frequency[heap[i]];
}
cnt_of_codes = heap_size;
PrnFreq(codeTable, cnt_of_codes);
for(i = heap_size / 2; i >= 1; i--)
DownHeap(i);
for(i = 0; i < 2 * MAX_SIZE - 1; i++)
parent[i] = 0;
k = heap[1];
avail = MAX_SIZE;
while (heap_size > 1)
{
i = heap[1];
heap[1] = heap[heap_size--];
DownHeap(1);
j = heap[1];
k = avail++;
frequency[k] = frequency[i] + frequency[j];
heap[1] = k;
DownHeap(1);
parent[i] = k;
parent[j] = -k;
left[k] = i;
right[k] = j;
}
incount = 0;
rewind(ifp);
SetCode(codeTable, cnt_of_codes);
return cnt_of_codes;
}
/*************************************************************
* 아스키 코드에 대한 코드 작성
*
*************************************************************/
void SetCode(CODE codeTable[], int cnt_of_codes)
{
int i, j, k, val = 0;
char temp[MAX_SIZE];
for(i = 0, j = 0, k = 0; i < cnt_of_codes; i++, j = 0, k = 0)
{
val = codeTable[i].ascii;
while ((val = parent[val]) != 0)
{
if (val > 0)
temp[j++] = '0';
else
{
temp[j++] = '1';
val = -val;
}
while(--j >= 0)
{
codeTable[i].code[k] = temp[j];
k++;
}
codeTable[i].code[k] = '\0';
codeTable[i].lengthOfCode = strlen(codeTable[i].code);
}
PrnCodeTable(codeTable, cnt_of_codes);
}
void PrnCodeTable(CODE codeTable[], int cnt_of_codes)
{
int i;
printf("Huffman code 생성 중...\n");
printf("---------------------------------\n");
printf("alphabet\tcode\n");
printf("---------------------------------\n");
for(i = 0; i < cnt_of_codes; i++)
if((codeTable[i].ascii >= 65 && codeTable[i].ascii <= 90) || (codeTable[i].ascii >= 97 && codeTable[i].ascii <= 122))
printf("%c\t\t%s\n", codeTable[i].ascii, codeTable[i].code);
printf("---------------------------------\n");
}
/*************************************************************
* 호프만 코드로 문자열 압축
*
*************************************************************/
int EncryptString(char original_String[], int length_Of_Original_String,
CODE codeTable[], int cnt_Of_Codes, char encoded_String[])
{
int i, pos, cnt_Of_Bits = 0;
for(i = 0; i < length_Of_Original_String; i++)
{
pos = SearchAlphabet(original_String[i], codeTable, cnt_Of_Codes);
if(pos >= 0)
{
AttachCode(encoded_String, cnt_Of_Bits, codeTable[pos].code, codeTable[pos].lengthOfCode);
cnt_Of_Bits += codeTable[pos].lengthOfCode;
}
else
{
printf("%c does not exist in code table!!\n", original_String[i]);
return -1;
}
}
return cnt_Of_Bits;
}
void WriteBitString(char encoded_String[], int cnt_Of_EncodedBits)
{
int i, pos = 0;
char a;
for(i=0; i < cnt_Of_EncodedBits; i++)
{
if(i%8==0)
{
putchar(' ');
a = encoded_String[pos++];
}
putchar(((a & bitMask_OR[0]) == 0) ? '0' : '1');
a <<= 1;
}
putchar('\n');
}
void WriteDecodedBit(char decoded_String[])
{
int i, j, length, val, tmp = 0, diff;
double progress = 0.0;
printf("압축 푸는 중 ...\n");
length = (int)strlen(decoded_String);
for(i =0; i < length; i++)
{
progress = (double)(i+1) / (double)length * 100.0;
if((int)progress%10 == 0 && (int)progress != 0 && (int)progress != 100)
{
if((val = (int)progress) != tmp)
{
if((diff = (val - tmp) / 10) > 1)
for(j = 1; j < diff; j++)
printf("%d %% 압축 품\n", tmp + j * 10);
printf("%d %% 압축 품\n", val);
tmp = val;
}
}
else if((int)progress/10 && (int)progress%10 > 5)
{
if((val = ((int)progress/10)*10) != tmp)
{
if((diff = (val - tmp) / 10) > 1)
for(j = 1; j < diff; j++)
printf("%d %% 압축 품\n", tmp + j * 10);
printf("%d %% 압축 품\n", val);
tmp = val;
}
}
else if((val = ((int)progress/10)*10) != tmp && (int)progress != 100)
{
if((diff = (val - tmp) / 10) > 1)
{
for(j = 1; j < diff; j++)
printf("%d %% 압축 품\n", tmp + j * 10);
}
printf("%d %% 압축 품\n", val);
tmp = val;
}
putc(decoded_String[i], ofp);
}
printf("완료.\n");
}
void WriteInFile(char encoded_String[], int cnt_Of_EncodedBits, CODE codeTable[],
int cnt_of_codes)
{
int i, j, ch = 0, incount, outcount, pos = 0, val, tmp = 0, diff;
double version = VERSION, progress = 0.0;
char *p, buffer[MAX_STRING_LENGTH];
printf("압축 중 ...\n");
fseek(ifp, 0L, SEEK_END);
incount = ftell(ifp);
// write a version
sprintf(buffer, "%.1f ", version);
for(i =0; i < (int)strlen(buffer); i++)
putc(buffer[i], ofp);
// write a size of the code tables
sprintf(buffer, "%d", cnt_of_codes);
for(i =0; i < (int)strlen(buffer); i++)
putc(buffer[i], ofp);
sprintf(buffer, "/%d ", cnt_Of_EncodedBits);
for(i =0; i < (int)strlen(buffer); i++)
putc(buffer[i], ofp);
// write contents of the code tables
for(i = 0; i < cnt_of_codes; i++)
{
putc(codeTable[i].ascii, ofp);
for(j = 0; j < codeTable[i].lengthOfCode; j++)
putc(codeTable[i].code[j], ofp);
}
putc(' ', ofp);
// write contents of compressed file
p = encoded_String;
for(i=0; (i < cnt_Of_EncodedBits) && (ch != EOF) && (*p != '\0'); i++)
{
progress = (double)i / (double)cnt_Of_EncodedBits * 100.0;
if((int)progress%10 == 0 && (int)progress != 0 && (int)progress != 100)
{
if((val = (int)progress) != tmp)
{
if((diff = (val - tmp) / 10) > 1)
{
for(j = 1; j < diff; j++)
printf("%d %% 압축\n", tmp + j * 10);
}
printf("%d %% 압축\n", val);
tmp = val;
}
}
else if((int)progress/10 && (int)progress%10 > 5)
{
if((val = ((int)progress/10)*10) != tmp)
{
if((diff = (val - tmp) / 10) > 1)
{
for(j = 1; j < diff; j++)
printf("%d %% 압축\n", tmp + j * 10);
}
printf("%d %% 압축\n", val);
tmp = val;
}
}
else if((val = ((int)progress/10)*10) != tmp && (int)progress != 100)
{
if((diff = (val - tmp) / 10) > 1)
{
for(j = 1; j < diff; j++)
printf("%d %% 압축\n", tmp + j * 10);
}
printf("%d %% 압축\n", val);
tmp = val;
}
if(i%8==0)
ch = putc( p[pos++], ofp );
}
printf("압축 완료.\n");
outcount = ftell(ofp);
printf("---------------------------------\n");
printf("텍스트 파일 크기 : %u bytes\n", incount);
printf("압축된 파일 크기 : %u bytes\n", outcount);
printf("압축 비율 : %.3f(%u/%u - 1)\n", ((double)incount / (double)outcount - 1.0)
, incount, outcount);
}
/*************************************************************
* 호프만 코드로 문자열 압축 풀기
*
*************************************************************/
int DecryptBitString(char encoded_String[], int cnt_Of_EncodeBit,
CODE codeTable[], int cnt_Of_Codes, char decoded_String[])
{
int i, index, pos=0, length_Of_DecodedString=0;
char code[MAX_CODE_LENGTH] = {'\0'};
char a;
for (i = 0; i < cnt_Of_EncodeBit; i++)
{
if(i%8 ==0)
a=encoded_String[pos++];
if(a & bitMask_OR[0])
strcat(code, "1");
else
strcat(code, "0");
a <<= 1;
index=SearchCode(code, codeTable, cnt_Of_Codes);
if(index != -1)
{
decoded_String[length_Of_DecodedString] = codeTable[index].ascii;
length_Of_DecodedString++;
code[0] = '\0';
}
}
decoded_String[length_Of_DecodedString] = '\0';
return length_Of_DecodedString;
}
/*************************************************************
* 호프만 테이블 내의 주어진 알파벳 문자의 인덱스 찾기
*
*************************************************************/
int SearchAlphabet(char key, CODE codeTable[], int cnt_Of_Codes)
{
int i, index = -1;
for (i = 0; i <cnt_Of_Codes; i++)
{
if(codeTable[i].ascii == key)
{
index=i;
break;
}
}
return index;
}
/*************************************************************
* 호프만 테이블 내의 주어진 코드의 인덱스 찾기
*
*************************************************************/
int SearchCode(char key[], CODE codeTable[], int cnt_Of_Codes)
{
int i, index=-1;
for (i = 0; i < cnt_Of_Codes; i++)
{
if(strcmp(codeTable[i].code, key) == 0)
{
index=i;
break;
}
}
return index;
}
/*************************************************************
*하나의 버퍼에 호프만 코드 저장
*
*************************************************************/
void AttachCode(char encoded_String[], int cnt_Of_Bits, char code[],
int lengthOfCode)
{
int i, lengthOfEncodedString, lastBitPosition;
lengthOfEncodedString = cnt_Of_Bits/8;
lastBitPosition = cnt_Of_Bits%8;
for (i = 0; i < lengthOfCode; i++)
{
if(code[i]=='1')
encoded_String[lengthOfEncodedString] |= bitMask_OR[lastBitPosition];
else
encoded_String[lengthOfEncodedString] &= bitMask_AND[lastBitPosition];
lastBitPosition++;
if(lastBitPosition == 8)
{
lastBitPosition = 0;
lengthOfEncodedString = lengthOfEncodedString + 1;
}
}
}
'UNIX_LINUX_C_C++' 카테고리의 다른 글
[펌] LZ 압축 알고리즘 (0) | 2011.10.16 |
---|---|
[펌] ZIP 알고리즘 (0) | 2011.10.16 |
[펌] 압축 알고리즘 소스 및 정리 (0) | 2011.10.16 |
[펌] SLZ 압축 소스 (0) | 2011.10.16 |
[펌] 공유 메모리(shared memory) 의 사용 (0) | 2011.10.14 |