|
|
| Line 1: |
Line 1: |
| − | <pre><nowiki>
| |
| − | <?php
| |
| − | /* // install the following programs
| |
| − | a2ps, catdoc, o3read, odt2txt, poppler-utils (pdftotext), tth (tex2html), wp2x, netpbm, gocr, imagemagick, ppthtml
| |
| − | */
| |
| | | | |
| − | if($argc == 1)exit(1);
| |
| − |
| |
| − | $filename = $argv[1];
| |
| − |
| |
| − | $finfo = finfo_open(FILEINFO_MIME);
| |
| − | $mime = finfo_file($finfo, $filename);
| |
| − | finfo_close($finfo);
| |
| − |
| |
| − | list($mimebase,$mimeext) = explode("/",$mime,2);
| |
| − |
| |
| − | $ext = array_pop(explode('.', $filename));
| |
| − |
| |
| − | echo "mime=$mime [$mimebase $mimeext]\n";
| |
| − | echo "ext=$ext\n";
| |
| − |
| |
| − | echo "@uri=$filename\n";
| |
| − | echo "@title=".basename($filename)."\n";
| |
| − | echo "\n";
| |
| − |
| |
| − | if($mimebase == "image"){
| |
| − | exec("convert '$filename' tmp-any2text.png");
| |
| − | system("gocr -e /dev/null tmp-any2text.png");
| |
| − | exec("rm tmp-any2text.png");
| |
| − | } else if($ext == "sxw" || $ext == "odt" || $ext == "ods" || $ext == "sxc" || $ext == "odp" || $ext == "sxi"){
| |
| − | system("unzip -p '$filename' content.xml | o3totxt");
| |
| − | } else if($ext == "ppt"){
| |
| − | system("ppthtml '$filename' | html2text");
| |
| − | } else if($ext == "xls"){
| |
| − | system("xls2csv '$filename'");
| |
| − | } else if($ext == "doc"){
| |
| − | system("catdoc '$filename'");
| |
| − | } else if($ext == "pdf"){
| |
| − | system("pdftotext '$filename' -");
| |
| − | } else if($mimebase == "text"){
| |
| − | system("cat '$filename'");
| |
| − | }
| |
| − |
| |
| − | ?>
| |
| − | </nowiki></pre>
| |