/* * (c) 2007 Steve Parker http://steve-parker.org/ * GPLv2 * * Attempt to retrieve the **TEXT** from a Microsoft Publisher document. * This will not deal with any formatting. I just want the text. * * It seems (April 2007) that there is no current tool to read MS Publisher (.pub) * files, other than MS Publisher itself, on any platform - Win, Mac, Linux, etc. * This tool certainly doesn't allow you to edit a MS Publisher file, but * it attempts to get some content out of it, in text form. * Due to a lack of documentation about the file format, and this being * an initial release after only an hour or so of reverse-engineering, * this tool gathers (hopefully) all of the text in the document, plus * other text which is not (or should not) be contained in the file. * * This tool has been written upon the basis of one PUB file (v3.0), and * the output of "od -c" on that file. It helps that I had some idea about * the content of the file. YMMV. * * Much of the output from this tool is useless; it errs on the side of caution. * It appears that there are flags within the PUB file to mark the start of * the content (and possibly also the end of the content). This warrants * further investigation. */ #include #include #include /* * Strings are of the form char \0 char \0 char \0 char \0 : * * 0550660 \0 \0 \0 A \0 c \0 t \0 i \0 v \0 * 0550700 i \0 t \0 y \0 \0 b \0 a \0 g \0 s \0 * * This says " Activity bags" */ // How large can an output file become? #define MAXKB 10 // How often to repeat the headers #define REPEATHEADER 1 FILE *outfile; void printcol(int x, int y) { if (x==y) { fprintf(outfile," bgcolor=white"); } else if (x>y) { fprintf(outfile," bgcolor=yellow"); } else { fprintf(outfile," bgcolor=lightblue"); } } /* * Sometimes it is useful to skip some characters from one file, which are not present in the other. * When the first file has the extra content, we use "skipA". * When the second fild has the extra content, we use "skipB". * * This allows us to see the stuff we have skipped over, without counting it as part of the diff. * * skipA will skip characters from the first file provided. * skipA will increment the count, so that the byte number of file "a" (FILE *a, int x) is consistent. * USAGE: skipA(&i, &x, 64, a); * 64 is the number of bytes to skip * * skipB does the equivalent for the second file. The count does not get incremented. * USAGE: skipB(i, &y, 2, b); * * Note the difference in pointers. skipA takes "i" as a pointer. */ void skipA(int *i, int *x, int s, FILE *a) { int skip; printf("\n\n **** SKIPPING %d from %d\n\n", s, *i); for (skip=s; skip>0; skip--) { *x=fgetc(a); *i=*i+1; } } void skipB(int i, int *y, int s, FILE *b) { int skip; printf("\n\n **** SKIPPING %d from %d\n\n", s, i); for (skip=s; skip>0; skip--) { *y=fgetc(b); } } void compare (char *f1, char *f2) { FILE *a; FILE *b; int x,y; int i = 0; int fnum=1; char *filename; int lastwasdiff=1; filename=(char*)malloc(255); a=fopen(f1, "r"); b=fopen(f2, "r"); if ((!a) || (!b)) { printf("ERROR: Cannot open files.\n"); exit(1); } x='a'; y='a'; sprintf(filename, "diffs.html", fnum); outfile=fopen(filename, "w"); printf("Creating %s\n", filename); fprintf(outfile, "\n"); fprintf(outfile,"\n\n", f1, f2); while ((x != EOF) || (y!=EOF)) { /* if (i==48740) { printf("\nSkipping - see %s\n", filename); skipA(&i, &x, 64, a); } if (i==58368) { printf("\nSkipping - see %s\n", filename); skipB(i, &y, 64, b); } */ /* * not for 12/ if (i==48740) { printf("\nSkipping - see %s\n", filename); skipA(&i, &x, 64, a); } if (i==56364) { printf("\nSkipping - see %s\n", filename); skipB(i, &y, 2, b); } if (i==58304) { printf("\nSkipping - see %s\n", filename); skipB(i,&y,62,b); } */ x=fgetc(a); y=fgetc(b); if (x==y) { if (lastwasdiff==1) fprintf(outfile, "\n"); lastwasdiff=0; } else { lastwasdiff=1; fprintf(outfile,"\n", i, i); if (x==EOF) { fprintf(outfile,""); } else { fprintf(outfile,"%3d%2x31) && (x<127)) fprintf(outfile, ">'%c'", x); else fprintf(outfile, "> "); } if (y==EOF) { fprintf(outfile, ""); } else { fprintf(outfile, "%3d%2x31) && (y<127)) fprintf(outfile, ">'%c'", y); else fprintf(outfile,"> "); } fprintf(outfile, ""); } i++; } fprintf(outfile,"\n
DecOct%s%s



"); fprintf(outfile,"%3d%3oEOFEOF
\n"); fclose(outfile); } int main (int argc, char *argv[]) { char *f1; char *f2; if (argc == 3) { f1 = argv[1]; f2 = argv[2]; compare(f1, f2); } else { return 1; } return 0; }