/* jpgsalv.c vi:se ts=8 sw=8: */ /* jpgsalv--Jpeg salvager * Copyright (C) 2005 Peter Backes * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* There are several things which make this program distinctive from * the various 'jpeg extractors' out there. It is not written in an * obscure language such as java, it is fast and it is stateful in that * it actually understands the basic jpeg structures. That is, instead * of simply looking for a start marker and copying all the data from * there until the end marker, it actually keeps some state. Especially * it can handle some cases of bogus start markers and it understands * markers with parameters. This way it is not fooled by parameter sections * of marker segments containing end markers, which happens for example * for embedded thumbnails. */ #include #include #include #include #include /* We don't use threads, can use quick versions. */ #ifdef _IO_getc_unlocked #undef getc #define getc _IO_getc_unlocked #endif #ifdef _IO_putc_unlocked #undef putc #define putc _IO_putc_unlocked #endif /* DFA for scanning for jpeg data in a file. */ static const struct { int symb, /* symbol, EOF signals default */ stat; /* state to go to */ } stab[] = { /* 0 */ {0xff, 2}, {EOF, 0}, /* header hi */ /* 2 */ {0xff, 2}, {0xd8, 5}, {EOF, 0}, /* header lo */ /* 5 */ {0xff, 7}, {EOF, 0}, /* tag hi (first) */ /* 7 */ {0xff, 7}, {0x00, 23}, {0xd0, 23}, {0xd1, 23}, {0xd2, 23}, {0xd3, 23}, {0xd4, 23}, {0xd5, 23}, {0xd6, 23}, {0xd7, 23}, {0xd8, 23}, {0xd9, 25}, {EOF, 20}, /* tag lo */ /* 20 */ {EOF, 21}, /* len hi */ /* 21 */ {EOF, 22}, /* len lo */ /* 22 */ {EOF, 22}, /* parameters */ /* 23 */ {0xff, 7}, {EOF, 23} /* body */ /* 25 */ /* end */ }; /* salvage jpegs from an input file */ void salvag(char *fn) { FILE *is, *os = NULL; int c, st = 0; unsigned int len = 0, ctr = 0; if (!(is = fopen(fn, "rb"))) { fprintf(stderr, "couldn't open %s for reading.\n", fn); return; } while ((c = getc(is)) != EOF) { switch (st) { char *ofn, *p; case 22: /* Inside a marker segment parameter * section, count down bytes and if done, * enter entropy-coded data segment mode. */ if (!len--) st = 23; /* FALLTHROUGH */ case 23: /* Inside an entropy-coded data segment, * simply output everything. */ if (os) putc(c, os); break; case 7: if (!os) break; /* If the heuristics have recognized * a jpeg file, but we hit 0xff 0xd8, then * the heuristic failed. rewind the file * and assume the jpeg file instead begins at * the current 0xff 0xd8. * * XXX A little bit dirty, might actually * leave trash behind without truncating or * re-opening if there is less data to be copied * than previously has been. */ assert(stab[st + 10].symb == 0xd8); if ((unsigned char) c == stab[st + 10].symb) { /*fprintf(stderr, "backup.\n");*/ rewind(os); putc(0xff, os); } putc(c, os); break; case 5: assert(stab[st].stat == 7); /* Check if 0xff 0xd8 0xff was recognized, * the heuristics to assume start of jpeg file. */ if ((unsigned char)c != stab[st].symb) break; /* Compute a file name by replacing the * extension with ".jpg". */ if ((p = strrchr(fn, '/'))) p++; else p = fn; ofn = malloc(strlen(p) + 5 + sizeof (unsigned long) * CHAR_BIT / 4 + 4); strcpy(ofn, p); /* find extension */ if (!(p = strrchr(ofn, '.'))) p = ofn + strlen(ofn); if (!ctr) strcpy(p, ".jpg"); else sprintf(p, "-%u.jpg", ctr); ctr++; if (!(os = fopen(ofn, "wb"))) { fprintf(stderr, "couldn't open %s for " "writing.\n", ofn); free(ofn); break; } puts(ofn); free(ofn); /* Put (partial) jpeg markers recognized so far */ putc(0xff, os); putc(0xd8, os); putc(0xff, os); break; case 20: /*fprintf(stderr, "%ld\n", ftell(is) - 3);*/ len = (unsigned char)c << 8; if (os) putc(c, os); break; case 21: len |= (unsigned char)c; len -= 2; if (os) putc(c, os); break; } while (stab[st].symb != EOF && stab[st].symb != (unsigned char)c) st++; st = stab[st].stat; /* Check if end of jpeg file has been recognized, * if so, close the file and reset the automaton. */ if (st == 25) { if (os) fclose(os); os = NULL; st = 0; } /*printf("%d,%d|", c, st);*/ } /* If there is still some output file open, then either the * heuristics couldn't handle the input file or it has been * truncated for some reason. */ if (os) { fprintf(stderr, "truncated.\n"); fclose(os); } /* puts(""); */ fclose(is); } int main(int argc, char *argv[]) { int i; for (i = 1; i < argc; i++) salvag(argv[i]); return EXIT_SUCCESS; }