Any2Text
From Ghoulwiki
Revision as of 13:40, 15 February 2008 by Ghoulsblade (talk | contribs)
<?php /* // install the following programs a2ps, catdoc, o3read, odt2txt, poppler-utils (pdftotext), tth (tex2html), wp2x, netpbm, gocr, imagemagick, ppthtml */ if($argc == 1)exit(1); $filename = $argv[1]; $finfo = finfo_open(FILEINFO_MIME); $mime = finfo_file($finfo, $filename); finfo_close($finfo); list($mimebase,$mimeext) = explode("/",$mime,2); $ext = array_pop(explode('.', $filename)); echo "mime=$mime [$mimebase $mimeext]\n"; echo "ext=$ext\n"; echo "@uri=$filename\n"; echo "@title=".basename($filename)."\n"; echo "\n"; if($mimebase == "image"){ exec("convert '$filename' tmp-any2text.png"); system("gocr -e /dev/null tmp-any2text.png"); exec("rm tmp-any2text.png"); } else if($ext == "sxw" || $ext == "odt" || $ext == "ods" || $ext == "sxc" || $ext == "odp" || $ext == "sxi"){ system("unzip -p '$filename' content.xml | o3totxt"); } else if($ext == "ppt"){ system("ppthtml '$filename' | html2text"); } else if($ext == "xls"){ system("xls2csv '$filename'"); } else if($ext == "doc"){ system("catdoc '$filename'"); } else if($ext == "pdf"){ system("pdftotext '$filename' -"); } else if($mimebase == "text"){ system("cat '$filename'"); } ?>