Any2Text

From Ghoulwiki
Jump to: navigation, search
<?php
/* // install the following programs
a2ps, catdoc, o3read, odt2txt, poppler-utils (pdftotext), tth (tex2html), wp2x, netpbm, gocr, imagemagick, ppthtml 
*/

if($argc == 1)exit(1);

$filename = $argv[1];

$finfo = finfo_open(FILEINFO_MIME);
$mime = finfo_file($finfo, $filename);
finfo_close($finfo);

list($mimebase,$mimeext) = explode("/",$mime,2);

$ext = array_pop(explode('.', $filename));

echo "mime=$mime [$mimebase $mimeext]\n";
echo "ext=$ext\n";

echo "@uri=$filename\n";
echo "@title=".basename($filename)."\n";
echo "\n";

if($mimebase == "image"){
        exec("convert '$filename' tmp-any2text.png");
        system("gocr -e /dev/null tmp-any2text.png");
        exec("rm tmp-any2text.png");
} else if($ext == "sxw" || $ext == "odt" || $ext == "ods" || $ext == "sxc" || $ext == "odp" || $ext == "sxi"){
        system("unzip -p '$filename' content.xml | o3totxt");
} else if($ext == "ppt"){
        system("ppthtml '$filename' | html2text");
} else if($ext == "xls"){
        system("xls2csv '$filename'");
} else if($ext == "doc"){
        system("catdoc '$filename'");
} else if($ext == "pdf"){
        system("pdftotext '$filename' -");
} else if($mimebase == "text"){
        system("cat '$filename'");
}

?>