/* jpgsalv.c vi:se ts=8 sw=8: */
/* jpgsalv--Jpeg salvager
* Copyright (C) 2005 Peter Backes
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/* There are several things which make this program distinctive from
* the various 'jpeg extractors' out there. It is not written in an
* obscure language such as java, it is fast and it is stateful in that
* it actually understands the basic jpeg structures. That is, instead
* of simply looking for a start marker and copying all the data from
* there until the end marker, it actually keeps some state. Especially
* it can handle some cases of bogus start markers and it understands
* markers with parameters. This way it is not fooled by parameter sections
* of marker segments containing end markers, which happens for example
* for embedded thumbnails.
*/
#include
#include
#include
#include
#include
/* We don't use threads, can use quick versions. */
#ifdef _IO_getc_unlocked
#undef getc
#define getc _IO_getc_unlocked
#endif
#ifdef _IO_putc_unlocked
#undef putc
#define putc _IO_putc_unlocked
#endif
/* DFA for scanning for jpeg data in a file. */
static const struct {
int symb, /* symbol, EOF signals default */
stat; /* state to go to */
} stab[] = {
/* 0 */ {0xff, 2}, {EOF, 0}, /* header hi */
/* 2 */ {0xff, 2}, {0xd8, 5}, {EOF, 0}, /* header lo */
/* 5 */ {0xff, 7}, {EOF, 0}, /* tag hi (first) */
/* 7 */ {0xff, 7}, {0x00, 23},
{0xd0, 23}, {0xd1, 23},
{0xd2, 23}, {0xd3, 23},
{0xd4, 23}, {0xd5, 23},
{0xd6, 23}, {0xd7, 23},
{0xd8, 23}, {0xd9, 25}, {EOF, 20}, /* tag lo */
/* 20 */ {EOF, 21}, /* len hi */
/* 21 */ {EOF, 22}, /* len lo */
/* 22 */ {EOF, 22}, /* parameters */
/* 23 */ {0xff, 7}, {EOF, 23} /* body */
/* 25 */ /* end */
};
/* salvage jpegs from an input file */
void salvag(char *fn)
{
FILE *is, *os = NULL;
int c, st = 0;
unsigned int len = 0, ctr = 0;
if (!(is = fopen(fn, "rb"))) {
fprintf(stderr, "couldn't open %s for reading.\n", fn);
return;
}
while ((c = getc(is)) != EOF) {
switch (st) {
char *ofn, *p;
case 22:
/* Inside a marker segment parameter
* section, count down bytes and if done,
* enter entropy-coded data segment mode.
*/
if (!len--)
st = 23;
/* FALLTHROUGH */
case 23:
/* Inside an entropy-coded data segment,
* simply output everything.
*/
if (os)
putc(c, os);
break;
case 7:
if (!os)
break;
/* If the heuristics have recognized
* a jpeg file, but we hit 0xff 0xd8, then
* the heuristic failed. rewind the file
* and assume the jpeg file instead begins at
* the current 0xff 0xd8.
*
* XXX A little bit dirty, might actually
* leave trash behind without truncating or
* re-opening if there is less data to be copied
* than previously has been.
*/
assert(stab[st + 10].symb == 0xd8);
if ((unsigned char) c == stab[st + 10].symb) {
/*fprintf(stderr, "backup.\n");*/
rewind(os);
putc(0xff, os);
}
putc(c, os);
break;
case 5:
assert(stab[st].stat == 7);
/* Check if 0xff 0xd8 0xff was recognized,
* the heuristics to assume start of jpeg file.
*/
if ((unsigned char)c != stab[st].symb)
break;
/* Compute a file name by replacing the
* extension with ".jpg".
*/
if ((p = strrchr(fn, '/')))
p++;
else
p = fn;
ofn = malloc(strlen(p) + 5 + sizeof
(unsigned long) * CHAR_BIT / 4 + 4);
strcpy(ofn, p);
/* find extension */
if (!(p = strrchr(ofn, '.')))
p = ofn + strlen(ofn);
if (!ctr)
strcpy(p, ".jpg");
else
sprintf(p, "-%u.jpg", ctr);
ctr++;
if (!(os = fopen(ofn, "wb"))) {
fprintf(stderr, "couldn't open %s for "
"writing.\n", ofn);
free(ofn);
break;
}
puts(ofn);
free(ofn);
/* Put (partial) jpeg markers recognized so far */
putc(0xff, os);
putc(0xd8, os);
putc(0xff, os);
break;
case 20:
/*fprintf(stderr, "%ld\n", ftell(is) - 3);*/
len = (unsigned char)c << 8;
if (os)
putc(c, os);
break;
case 21:
len |= (unsigned char)c;
len -= 2;
if (os)
putc(c, os);
break;
}
while (stab[st].symb != EOF
&& stab[st].symb != (unsigned char)c)
st++;
st = stab[st].stat;
/* Check if end of jpeg file has been recognized,
* if so, close the file and reset the automaton.
*/
if (st == 25) {
if (os)
fclose(os);
os = NULL;
st = 0;
}
/*printf("%d,%d|", c, st);*/
}
/* If there is still some output file open, then either the
* heuristics couldn't handle the input file or it has been
* truncated for some reason.
*/
if (os) {
fprintf(stderr, "truncated.\n");
fclose(os);
}
/* puts(""); */
fclose(is);
}
int main(int argc, char *argv[])
{
int i;
for (i = 1; i < argc; i++)
salvag(argv[i]);
return EXIT_SUCCESS;
}