我已经创建了一个框架来解析合适大小的文本文件,这些文件可以放在内存RAM中,而且现在情况还顺利.我没有抱怨,但是如果遇到我必须处理大文件的情况,比如大于8GB(这是我的大小)怎么办?
处理这些大文件的有效方法是什么?
处理这些大文件的有效方法是什么?
我的框架:
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> int Parse(const char *filename,const char *outputfile); int main(void) { clock_t t1 = clock(); /* ............................................................................................................................. */ Parse("file.txt",NULL); /* ............................................................................................................................. */ clock_t t2 = clock(); fprintf(stderr,"time elapsed: %.4f\n",(double)(t2 - t1) / CLOCKS_PER_SEC); fprintf(stderr,"Press any key to continue . . . "); getchar(); return 0; } long GetFileSize(FILE * fp) { long f_size; fseek(fp,0L,SEEK_END); f_size = ftell(fp); fseek(fp,SEEK_SET); return f_size; } char *dump_file_to_array(FILE *fp,size_t f_size) { char *buf = (char *)calloc(f_size + 1,1); if (buf) { size_t n = 0; while (fgets(buf + n,INT_MAX,fp)) { n += strlen(buf + n); } } return buf; } int Parse(const char *filename,const char *outputfile) { /* open file for reading in text mode */ FILE *fp = fopen(filename,"r"); if (!fp) { perror(filename); return 1; } /* store file in dynamic memory and close file */ size_t f_size = GetFileSize(fp); char *buf = dump_file_to_array(fp,f_size); fclose(fp); if (!buf) { fputs("error: memory allocation Failed.\n",stderr); return 2; } /* state machine variables */ // ........ /* array index variables */ size_t x = 0; size_t y = 0; /* main loop */ while (buf[x]) { switch (buf[x]) { /* ... */ } x++; } /* NUL-terminate array at y */ buf[y] = '\0'; /* write buffer to file and clean up */ outputfile ? fp = fopen(outputfile,"w") : fp = fopen(filename,"w"); if (!fp) { outputfile ? perror(outputfile) : perror(filename); } else { fputs(buf,fp); fclose(fp); } free(buf); return 0; }
int delete_pattern_in_file(const char *filename,const char *pattern,"r"); if (!fp) { perror(filename); return 1; } /* copy file contents to buffer and close file */ size_t f_size = GetFileSize(fp); char *buf = dump_file_to_array(fp,f_size); fclose(fp); if (!buf) { fputs("error - memory allocation Failed",stderr); return 2; } /* delete first match */ size_t n = 0,pattern_len = strlen(pattern); char *tmp,*ptr = strstr(buf,pattern); if (!ptr) { fputs("No match found.\n",stderr); free(buf); return -1; } else { n = ptr - buf; ptr += pattern_len; tmp = ptr; } /* delete the rest */ while (ptr = strstr(ptr,pattern)) { while (tmp < ptr) { buf[n++] = *tmp++; } ptr += pattern_len; tmp = ptr; } /* copy the rest of the buffer */ strcpy(buf + n,tmp); /* open file for writing and print the processed buffer to it */ outputfile ? fp = fopen(outputfile,fp); fclose(fp); } free(buf); return 0; }