Any2Text
From Ghoulwiki
<?php
/* // install the following programs
a2ps, catdoc, o3read, odt2txt, poppler-utils (pdftotext), tth (tex2html), wp2x, netpbm, gocr, imagemagick, ppthtml
*/
if($argc == 1)exit(1);
$filename = $argv[1];
$finfo = finfo_open(FILEINFO_MIME);
$mime = finfo_file($finfo, $filename);
finfo_close($finfo);
list($mimebase,$mimeext) = explode("/",$mime,2);
$ext = array_pop(explode('.', $filename));
echo "mime=$mime [$mimebase $mimeext]\n";
echo "ext=$ext\n";
echo "@uri=$filename\n";
echo "@title=".basename($filename)."\n";
echo "\n";
if($mimebase == "image"){
exec("convert '$filename' tmp-any2text.png");
system("gocr -e /dev/null tmp-any2text.png");
exec("rm tmp-any2text.png");
} else if($ext == "sxw" || $ext == "odt" || $ext == "ods" || $ext == "sxc" || $ext == "odp" || $ext == "sxi"){
system("unzip -p '$filename' content.xml | o3totxt");
} else if($ext == "ppt"){
system("ppthtml '$filename' | html2text");
} else if($ext == "xls"){
system("xls2csv '$filename'");
} else if($ext == "doc"){
system("catdoc '$filename'");
} else if($ext == "pdf"){
system("pdftotext '$filename' -");
} else if($mimebase == "text"){
system("cat '$filename'");
}
?>