|
|
Line 1: |
Line 1: |
− | <pre><nowiki>
| |
− | <?php
| |
− | /* // install the following programs
| |
− | a2ps, catdoc, o3read, odt2txt, poppler-utils (pdftotext), tth (tex2html), wp2x, netpbm, gocr, imagemagick, ppthtml
| |
− | */
| |
| | | |
− | if($argc == 1)exit(1);
| |
− |
| |
− | $filename = $argv[1];
| |
− |
| |
− | $finfo = finfo_open(FILEINFO_MIME);
| |
− | $mime = finfo_file($finfo, $filename);
| |
− | finfo_close($finfo);
| |
− |
| |
− | list($mimebase,$mimeext) = explode("/",$mime,2);
| |
− |
| |
− | $ext = array_pop(explode('.', $filename));
| |
− |
| |
− | echo "mime=$mime [$mimebase $mimeext]\n";
| |
− | echo "ext=$ext\n";
| |
− |
| |
− | echo "@uri=$filename\n";
| |
− | echo "@title=".basename($filename)."\n";
| |
− | echo "\n";
| |
− |
| |
− | if($mimebase == "image"){
| |
− | exec("convert '$filename' tmp-any2text.png");
| |
− | system("gocr -e /dev/null tmp-any2text.png");
| |
− | exec("rm tmp-any2text.png");
| |
− | } else if($ext == "sxw" || $ext == "odt" || $ext == "ods" || $ext == "sxc" || $ext == "odp" || $ext == "sxi"){
| |
− | system("unzip -p '$filename' content.xml | o3totxt");
| |
− | } else if($ext == "ppt"){
| |
− | system("ppthtml '$filename' | html2text");
| |
− | } else if($ext == "xls"){
| |
− | system("xls2csv '$filename'");
| |
− | } else if($ext == "doc"){
| |
− | system("catdoc '$filename'");
| |
− | } else if($ext == "pdf"){
| |
− | system("pdftotext '$filename' -");
| |
− | } else if($mimebase == "text"){
| |
− | system("cat '$filename'");
| |
− | }
| |
− |
| |
− | ?>
| |
− | </nowiki></pre>
| |