kio Library API Documentation

kmimemagic.cpp

00001 /* This file is part of the KDE libraries 00002 Copyright (C) 2000 Fritz Elfert <fritz@kde.org> 00003 Copyright (C) 2004 Allan Sandfeld Jensen <kde@carewolf.com> 00004 00005 This library is free software; you can redistribute it and/or 00006 modify it under the terms of the GNU Library General Public 00007 License version 2 as published by the Free Software Foundation. 00008 00009 This library is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 Library General Public License for more details. 00013 00014 You should have received a copy of the GNU Library General Public License 00015 along with this library; see the file COPYING.LIB. If not, write to 00016 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 00017 Boston, MA 02111-1307, USA. 00018 */ 00019 #include "kmimemagic.h" 00020 #include <kdebug.h> 00021 #include <kapplication.h> 00022 #include <qfile.h> 00023 #include <ksimpleconfig.h> 00024 #include <kstandarddirs.h> 00025 #include <kstaticdeleter.h> 00026 #include <klargefile.h> 00027 #include <assert.h> 00028 00029 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb); 00030 static void process(struct config_rec* conf, const QString &); 00031 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes); 00032 static int tagmagic(unsigned char *buf, int nbytes); 00033 static int textmagic(struct config_rec* conf, unsigned char *, int); 00034 00035 static void tryit(struct config_rec* conf, unsigned char *buf, int nb); 00036 static int match(struct config_rec* conf, unsigned char *, int); 00037 00038 KMimeMagic* KMimeMagic::s_pSelf; 00039 static KStaticDeleter<KMimeMagic> kmimemagicsd; 00040 00041 KMimeMagic* KMimeMagic::self() 00042 { 00043 if( !s_pSelf ) 00044 initStatic(); 00045 return s_pSelf; 00046 } 00047 00048 void KMimeMagic::initStatic() 00049 { 00050 s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() ); 00051 s_pSelf->setFollowLinks( true ); 00052 } 00053 00054 #include <stdio.h> 00055 #include <unistd.h> 00056 #include <stdlib.h> 00057 #include <sys/wait.h> 00058 #include <sys/types.h> 00059 #include <sys/stat.h> 00060 #include <fcntl.h> 00061 #include <errno.h> 00062 #include <ctype.h> 00063 #include <time.h> 00064 #include <utime.h> 00065 #include <stdarg.h> 00066 #include <qregexp.h> 00067 #include <qstring.h> 00068 00069 //#define MIME_MAGIC_DEBUG_TABLE // untested 00070 00071 // Uncomment to debug the config-file parsing phase 00072 //#define DEBUG_APPRENTICE 00073 // Uncomment to debug the matching phase 00074 //#define DEBUG_MIMEMAGIC 00075 00076 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE) 00077 #define DEBUG_LINENUMBERS 00078 #endif 00079 00080 /* 00081 * Buitltin Mime types 00082 */ 00083 #define MIME_BINARY_UNKNOWN "application/octet-stream" 00084 #define MIME_BINARY_UNREADABLE "application/x-unreadable" 00085 #define MIME_BINARY_ZEROSIZE "application/x-zerosize" 00086 #define MIME_TEXT_UNKNOWN "text/plain" 00087 #define MIME_TEXT_PLAIN "text/plain" 00088 #define MIME_INODE_DIR "inode/directory" 00089 #define MIME_INODE_CDEV "inode/chardevice" 00090 #define MIME_INODE_BDEV "inode/blockdevice" 00091 #define MIME_INODE_FIFO "inode/fifo" 00092 #define MIME_INODE_LINK "inode/link" 00093 #define MIME_INODE_SOCK "inode/socket" 00094 // Following should go in magic-file - Fritz 00095 #define MIME_APPL_TROFF "application/x-troff" 00096 #define MIME_APPL_TAR "application/x-tar" 00097 #define MIME_TEXT_FORTRAN "text/x-fortran" 00098 00099 #define MAXMIMESTRING 256 00100 00101 #define HOWMANY 4000 /* big enough to recognize most WWW files, and skip GPL-headers */ 00102 #define MAXDESC 50 /* max leng of text description */ 00103 #define MAXstring 64 /* max leng of "string" types */ 00104 00105 typedef union VALUETYPE { 00106 unsigned char b; 00107 unsigned short h; 00108 unsigned long l; 00109 char s[MAXstring]; 00110 unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */ 00111 unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */ 00112 } VALUETYPE; 00113 00114 struct magic { 00115 struct magic *next; /* link to next entry */ 00116 #ifdef DEBUG_LINENUMBERS 00117 int lineno; /* line number from magic file - doesn't say from which one ;) */ 00118 #endif 00119 00120 short flag; 00121 #define INDIR 1 /* if '>(...)' appears, */ 00122 #define UNSIGNED 2 /* comparison is unsigned */ 00123 short cont_level; /* level of ">" */ 00124 struct { 00125 char type; /* byte short long */ 00126 long offset; /* offset from indirection */ 00127 } in; 00128 long offset; /* offset to magic number */ 00129 unsigned char reln; /* relation (0=eq, '>'=gt, etc) */ 00130 char type; /* int, short, long or string. */ 00131 char vallen; /* length of string value, if any */ 00132 #define BYTE 1 00133 #define SHORT 2 00134 #define LONG 4 00135 #define STRING 5 00136 #define DATE 6 00137 #define BESHORT 7 00138 #define BELONG 8 00139 #define BEDATE 9 00140 #define LESHORT 10 00141 #define LELONG 11 00142 #define LEDATE 12 00143 VALUETYPE value; /* either number or string */ 00144 unsigned long mask; /* mask before comparison with value */ 00145 char nospflag; /* suppress space character */ 00146 00147 /* NOTE: this string is suspected of overrunning - find it! */ 00148 char desc[MAXDESC]; /* description */ 00149 }; 00150 00151 /* 00152 * data structures for tar file recognition 00153 * -------------------------------------------------------------------------- 00154 * Header file for public domain tar (tape archive) program. 00155 * 00156 * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John 00157 * Gilmore, ihnp4!hoptoad!gnu. 00158 * 00159 * Header block on tape. 00160 * 00161 * I'm going to use traditional DP naming conventions here. A "block" is a big 00162 * chunk of stuff that we do I/O on. A "record" is a piece of info that we 00163 * care about. Typically many "record"s fit into a "block". 00164 */ 00165 #define RECORDSIZE 512 00166 #define NAMSIZ 100 00167 #define TUNMLEN 32 00168 #define TGNMLEN 32 00169 00170 union record { 00171 char charptr[RECORDSIZE]; 00172 struct header { 00173 char name[NAMSIZ]; 00174 char mode[8]; 00175 char uid[8]; 00176 char gid[8]; 00177 char size[12]; 00178 char mtime[12]; 00179 char chksum[8]; 00180 char linkflag; 00181 char linkname[NAMSIZ]; 00182 char magic[8]; 00183 char uname[TUNMLEN]; 00184 char gname[TGNMLEN]; 00185 char devmajor[8]; 00186 char devminor[8]; 00187 } header; 00188 }; 00189 00190 /* The magic field is filled with this if uname and gname are valid. */ 00191 #define TMAGIC "ustar " /* 7 chars and a null */ 00192 00193 /* 00194 * file-function prototypes 00195 */ 00196 static int is_tar(unsigned char *, int); 00197 static unsigned long signextend(struct magic *, unsigned long); 00198 static int getvalue(struct magic *, char **); 00199 static int hextoint(int); 00200 static char *getstr(char *, char *, int, int *); 00201 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int); 00202 static int mcheck(union VALUETYPE *, struct magic *); 00203 static int mconvert(union VALUETYPE *, struct magic *); 00204 static long from_oct(int, char *); 00205 00206 /* 00207 * includes for ASCII substring recognition formerly "names.h" in file 00208 * command 00209 * 00210 * Original notes: names and types used by ascmagic in file(1). 00211 * These tokens are 00212 * here because they can appear anywhere in the first HOWMANY bytes, while 00213 * tokens in /etc/magic must appear at fixed offsets into the file. Don't 00214 * make HOWMANY too high unless you have a very fast CPU. 00215 */ 00216 00217 /* these types are used calculate index to 'types': keep em in sync! */ 00218 /* HTML inserted in first because this is a web server module now */ 00219 /* ENG removed because stupid */ 00220 #define L_HTML 0x001 /* HTML */ 00221 #define L_C 0x002 /* first and foremost on UNIX */ 00222 #define L_MAKE 0x004 /* Makefiles */ 00223 #define L_PLI 0x008 /* PL/1 */ 00224 #define L_MACH 0x010 /* some kinda assembler */ 00225 #define L_PAS 0x020 /* Pascal */ 00226 #define L_JAVA 0x040 /* Java source */ 00227 #define L_CPP 0x080 /* C++ */ 00228 #define L_MAIL 0x100 /* Electronic mail */ 00229 #define L_NEWS 0x200 /* Usenet Netnews */ 00230 #define L_DIFF 0x400 /* Output of diff */ 00231 #define L_OBJC 0x800 /* Objective C */ 00232 00233 #define P_HTML 0 /* HTML */ 00234 #define P_C 1 /* first and foremost on UNIX */ 00235 #define P_MAKE 2 /* Makefiles */ 00236 #define P_PLI 3 /* PL/1 */ 00237 #define P_MACH 4 /* some kinda assembler */ 00238 #define P_PAS 5 /* Pascal */ 00239 #define P_JAVA 6 /* Java source */ 00240 #define P_CPP 7 /* C++ */ 00241 #define P_MAIL 8 /* Electronic mail */ 00242 #define P_NEWS 9 /* Usenet Netnews */ 00243 #define P_DIFF 10 /* Output of diff */ 00244 #define P_OBJC 11 /* Objective C */ 00245 00246 typedef struct asc_type { 00247 const char *type; 00248 int kwords; 00249 double weight; 00250 } asc_type; 00251 00252 static const asc_type types[] = { 00253 { "text/html", 19, 2 }, // 10 items but 10 different words only 00254 { "text/x-c", 13, 1 }, 00255 { "text/x-makefile", 4, 1.9 }, 00256 { "text/x-pli", 1, 3 }, 00257 { "text/x-assembler", 6, 2.1 }, 00258 { "text/x-pascal", 1, 1 }, 00259 { "text/x-java", 12, 1 }, 00260 { "text/x-c++", 19, 1 }, 00261 { "message/rfc822", 4, 1.9 }, 00262 { "message/news", 3, 2 }, 00263 { "text/x-diff", 4, 2 }, 00264 { "text/x-objc", 10, 1 } 00265 }; 00266 00267 #define NTYPES (sizeof(types)/sizeof(asc_type)) 00268 00269 static struct names { 00270 const char *name; 00271 short type; 00272 } const names[] = { 00273 { 00274 "<html", L_HTML 00275 }, 00276 { 00277 "<HTML", L_HTML 00278 }, 00279 { 00280 "<head", L_HTML 00281 }, 00282 { 00283 "<HEAD", L_HTML 00284 }, 00285 { 00286 "<body", L_HTML 00287 }, 00288 { 00289 "<BODY", L_HTML 00290 }, 00291 { 00292 "<title", L_HTML 00293 }, 00294 { 00295 "<TITLE", L_HTML 00296 }, 00297 { 00298 "<h1", L_HTML 00299 }, 00300 { 00301 "<H1", L_HTML 00302 }, 00303 { 00304 "<a", L_HTML 00305 }, 00306 { 00307 "<A", L_HTML 00308 }, 00309 { 00310 "<img", L_HTML 00311 }, 00312 { 00313 "<IMG", L_HTML 00314 }, 00315 { 00316 "<!--", L_HTML 00317 }, 00318 { 00319 "<!doctype", L_HTML 00320 }, 00321 { 00322 "<!DOCTYPE", L_HTML 00323 }, 00324 { 00325 "<div", L_HTML 00326 }, 00327 { 00328 "<DIV", L_HTML 00329 }, 00330 { 00331 "<frame", L_HTML 00332 }, 00333 { 00334 "<FRAME", L_HTML 00335 }, 00336 { 00337 "<frameset", L_HTML 00338 }, 00339 { 00340 "<FRAMESET", L_HTML 00341 }, 00342 { 00343 "<script", L_HTML 00344 }, 00345 { 00346 "<SCRIPT", L_HTML 00347 }, 00348 { 00349 "/*", L_C|L_CPP|L_JAVA|L_OBJC 00350 }, 00351 { 00352 "//", L_C|L_CPP|L_JAVA|L_OBJC 00353 }, 00354 { 00355 "#include", L_C|L_CPP 00356 }, 00357 { 00358 "#ifdef", L_C|L_CPP 00359 }, 00360 { 00361 "#ifndef", L_C|L_CPP 00362 }, 00363 { 00364 "bool", L_C|L_CPP 00365 }, 00366 { 00367 "char", L_C|L_CPP|L_JAVA|L_OBJC 00368 }, 00369 { 00370 "int", L_C|L_CPP|L_JAVA|L_OBJC 00371 }, 00372 { 00373 "float", L_C|L_CPP|L_JAVA|L_OBJC 00374 }, 00375 { 00376 "void", L_C|L_CPP|L_JAVA|L_OBJC 00377 }, 00378 { 00379 "extern", L_C|L_CPP 00380 }, 00381 { 00382 "struct", L_C|L_CPP 00383 }, 00384 { 00385 "union", L_C|L_CPP 00386 }, 00387 { 00388 "implements", L_JAVA 00389 }, 00390 { 00391 "super", L_JAVA 00392 }, 00393 { 00394 "import", L_JAVA 00395 }, 00396 { 00397 "class", L_CPP|L_JAVA 00398 }, 00399 { 00400 "public", L_CPP|L_JAVA 00401 }, 00402 { 00403 "private", L_CPP|L_JAVA 00404 }, 00405 { 00406 "explicit", L_CPP 00407 }, 00408 { 00409 "virtual", L_CPP 00410 }, 00411 { 00412 "namespace", L_CPP 00413 }, 00414 { 00415 "#import", L_OBJC 00416 }, 00417 { 00418 "@interface", L_OBJC 00419 }, 00420 { 00421 "@implementation", L_OBJC 00422 }, 00423 { 00424 "@protocol", L_OBJC 00425 }, 00426 { 00427 "CFLAGS", L_MAKE 00428 }, 00429 { 00430 "LDFLAGS", L_MAKE 00431 }, 00432 { 00433 "all:", L_MAKE 00434 }, 00435 { 00436 ".PHONY:", L_MAKE 00437 }, 00438 { 00439 "srcdir", L_MAKE 00440 }, 00441 { 00442 "exec_prefix", L_MAKE 00443 }, 00444 /* 00445 * Too many files of text have these words in them. Find another way 00446 * to recognize Fortrash. 00447 */ 00448 { 00449 ".ascii", L_MACH 00450 }, 00451 { 00452 ".asciiz", L_MACH 00453 }, 00454 { 00455 ".byte", L_MACH 00456 }, 00457 { 00458 ".even", L_MACH 00459 }, 00460 { 00461 ".globl", L_MACH 00462 }, 00463 { 00464 "clr", L_MACH 00465 }, 00466 { 00467 "(input", L_PAS 00468 }, 00469 { 00470 "dcl", L_PLI 00471 }, 00472 { 00473 "Received:", L_MAIL 00474 }, 00475 /* we now stop at '>' for tokens, so this one won't work { 00476 ">From", L_MAIL 00477 },*/ 00478 { 00479 "Return-Path:", L_MAIL 00480 }, 00481 { 00482 "Cc:", L_MAIL 00483 }, 00484 { 00485 "Newsgroups:", L_NEWS 00486 }, 00487 { 00488 "Path:", L_NEWS 00489 }, 00490 { 00491 "Organization:", L_NEWS 00492 }, 00493 { 00494 "---", L_DIFF 00495 }, 00496 { 00497 "+++", L_DIFF 00498 }, 00499 { 00500 "***", L_DIFF 00501 }, 00502 { 00503 "@@", L_DIFF 00504 }, 00505 { 00506 NULL, 0 00507 } 00508 }; 00509 00520 class KMimeMagicUtimeConf 00521 { 00522 public: 00523 KMimeMagicUtimeConf() 00524 { 00525 tmpDirs << QString::fromLatin1("/tmp"); // default value 00526 00527 // The trick is that we also don't want the user to override globally set 00528 // directories. So we have to misuse KStandardDirs :} 00529 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" ); 00530 if ( !confDirs.isEmpty() ) 00531 { 00532 QString globalConf = confDirs.last() + "kmimemagicrc"; 00533 if ( QFile::exists( globalConf ) ) 00534 { 00535 KSimpleConfig cfg( globalConf ); 00536 cfg.setGroup( "Settings" ); 00537 tmpDirs = cfg.readListEntry( "atimeDirs" ); 00538 } 00539 if ( confDirs.count() > 1 ) 00540 { 00541 QString localConf = confDirs.first() + "kmimemagicrc"; 00542 if ( QFile::exists( localConf ) ) 00543 { 00544 KSimpleConfig cfg( localConf ); 00545 cfg.setGroup( "Settings" ); 00546 tmpDirs += cfg.readListEntry( "atimeDirs" ); 00547 } 00548 } 00549 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) 00550 { 00551 QString dir = *it; 00552 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' ) 00553 (*it) += '/'; 00554 } 00555 } 00556 #if 0 00557 // debug code 00558 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) 00559 kdDebug(7018) << " atimeDir: " << *it << endl; 00560 #endif 00561 } 00562 00563 bool restoreAccessTime( const QString & file ) const 00564 { 00565 QString dir = file.left( file.findRev( '/' ) ); 00566 bool res = tmpDirs.contains( dir ); 00567 //kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl; 00568 return res; 00569 } 00570 QStringList tmpDirs; 00571 }; 00572 00573 /* current config */ 00574 struct config_rec { 00575 bool followLinks; 00576 QString resultBuf; 00577 int accuracy; 00578 00579 struct magic *magic, /* head of magic config list */ 00580 *last; 00581 KMimeMagicUtimeConf * utimeConf; 00582 }; 00583 00584 #ifdef MIME_MAGIC_DEBUG_TABLE 00585 static void 00586 test_table() 00587 { 00588 struct magic *m; 00589 struct magic *prevm = NULL; 00590 00591 kdDebug(7018) << "test_table : started" << endl; 00592 for (m = conf->magic; m; m = m->next) { 00593 if (isprint((((unsigned long) m) >> 24) & 255) && 00594 isprint((((unsigned long) m) >> 16) & 255) && 00595 isprint((((unsigned long) m) >> 8) & 255) && 00596 isprint(((unsigned long) m) & 255)) { 00597 //debug("test_table: POINTER CLOBBERED! " 00598 //"m=\"%c%c%c%c\" line=%d", 00599 (((unsigned long) m) >> 24) & 255, 00600 (((unsigned long) m) >> 16) & 255, 00601 (((unsigned long) m) >> 8) & 255, 00602 ((unsigned long) m) & 255, 00603 prevm ? prevm->lineno : -1); 00604 break; 00605 } 00606 prevm = m; 00607 } 00608 } 00609 #endif 00610 00611 #define EATAB {while (isascii((unsigned char) *l) && \ 00612 isspace((unsigned char) *l)) ++l;} 00613 00614 int KMimeMagic::parse_line(char *line, int *rule, int lineno) 00615 { 00616 int ws_offset; 00617 00618 /* delete newline */ 00619 if (line[0]) { 00620 line[strlen(line) - 1] = '\0'; 00621 } 00622 /* skip leading whitespace */ 00623 ws_offset = 0; 00624 while (line[ws_offset] && isspace(line[ws_offset])) { 00625 ws_offset++; 00626 } 00627 00628 /* skip blank lines */ 00629 if (line[ws_offset] == 0) { 00630 return 0; 00631 } 00632 /* comment, do not parse */ 00633 if (line[ws_offset] == '#') 00634 return 0; 00635 00636 /* if we get here, we're going to use it so count it */ 00637 (*rule)++; 00638 00639 /* parse it */ 00640 return (parse(line + ws_offset, lineno) != 0); 00641 } 00642 00643 /* 00644 * apprentice - load configuration from the magic file. 00645 */ 00646 int KMimeMagic::apprentice( const QString& magicfile ) 00647 { 00648 FILE *f; 00649 char line[BUFSIZ + 1]; 00650 int errs = 0; 00651 int lineno; 00652 int rule = 0; 00653 QCString fname; 00654 00655 if (magicfile.isEmpty()) 00656 return -1; 00657 fname = QFile::encodeName(magicfile); 00658 f = fopen(fname, "r"); 00659 if (f == NULL) { 00660 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl; 00661 return -1; 00662 } 00663 00664 /* parse it */ 00665 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++) 00666 if (parse_line(line, &rule, lineno)) 00667 errs++; 00668 00669 fclose(f); 00670 00671 #ifdef DEBUG_APPRENTICE 00672 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; 00673 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl; 00674 #endif 00675 00676 #ifdef MIME_MAGIC_DEBUG_TABLE 00677 test_table(); 00678 #endif 00679 00680 return (errs ? -1 : 0); 00681 } 00682 00683 int KMimeMagic::buff_apprentice(char *buff) 00684 { 00685 char line[BUFSIZ + 2]; 00686 int errs = 0; 00687 int lineno = 1; 00688 char *start = buff; 00689 char *end; 00690 int count = 0; 00691 int rule = 0; 00692 int len = strlen(buff) + 1; 00693 00694 /* parse it */ 00695 do { 00696 count = (len > BUFSIZ-1)?BUFSIZ-1:len; 00697 strncpy(line, start, count); 00698 line[count] = '\0'; 00699 if ((end = strchr(line, '\n'))) { 00700 *(++end) = '\0'; 00701 count = strlen(line); 00702 } else 00703 strcat(line, "\n"); 00704 start += count; 00705 len -= count; 00706 if (parse_line(line, &rule, lineno)) 00707 errs++; 00708 lineno++; 00709 } while (len > 0); 00710 00711 #ifdef DEBUG_APPRENTICE 00712 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; 00713 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl; 00714 #endif 00715 00716 #ifdef MIME_MAGIC_DEBUG_TABLE 00717 test_table(); 00718 #endif 00719 00720 return (errs ? -1 : 0); 00721 } 00722 00723 /* 00724 * extend the sign bit if the comparison is to be signed 00725 */ 00726 static unsigned long 00727 signextend(struct magic *m, unsigned long v) 00728 { 00729 if (!(m->flag & UNSIGNED)) 00730 switch (m->type) { 00731 /* 00732 * Do not remove the casts below. They are vital. 00733 * When later compared with the data, the sign 00734 * extension must have happened. 00735 */ 00736 case BYTE: 00737 v = (char) v; 00738 break; 00739 case SHORT: 00740 case BESHORT: 00741 case LESHORT: 00742 v = (short) v; 00743 break; 00744 case DATE: 00745 case BEDATE: 00746 case LEDATE: 00747 case LONG: 00748 case BELONG: 00749 case LELONG: 00750 v = (long) v; 00751 break; 00752 case STRING: 00753 break; 00754 default: 00755 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl; 00756 return 998; //good value 00757 } 00758 return v; 00759 } 00760 00761 /* 00762 * parse one line from magic file, put into magic[index++] if valid 00763 */ 00764 int KMimeMagic::parse(char *l, int 00765 #ifdef DEBUG_LINENUMBERS 00766 lineno 00767 #endif 00768 ) 00769 { 00770 int i = 0; 00771 struct magic *m; 00772 char *t, 00773 *s; 00774 /* allocate magic structure entry */ 00775 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) { 00776 kdError(7018) << "parse: Out of memory." << endl; 00777 return -1; 00778 } 00779 /* append to linked list */ 00780 m->next = NULL; 00781 if (!conf->magic || !conf->last) { 00782 conf->magic = conf->last = m; 00783 } else { 00784 conf->last->next = m; 00785 conf->last = m; 00786 } 00787 00788 /* set values in magic structure */ 00789 m->flag = 0; 00790 m->cont_level = 0; 00791 #ifdef DEBUG_LINENUMBERS 00792 m->lineno = lineno; 00793 #endif 00794 00795 while (*l == '>') { 00796 ++l; /* step over */ 00797 m->cont_level++; 00798 } 00799 00800 if (m->cont_level != 0 && *l == '(') { 00801 ++l; /* step over */ 00802 m->flag |= INDIR; 00803 } 00804 /* get offset, then skip over it */ 00805 m->offset = (int) strtol(l, &t, 0); 00806 if (l == t) { 00807 kdError(7018) << "parse: offset " << l << " invalid" << endl; 00808 } 00809 l = t; 00810 00811 if (m->flag & INDIR) { 00812 m->in.type = LONG; 00813 m->in.offset = 0; 00814 /* 00815 * read [.lbs][+-]nnnnn) 00816 */ 00817 if (*l == '.') { 00818 switch (*++l) { 00819 case 'l': 00820 m->in.type = LONG; 00821 break; 00822 case 's': 00823 m->in.type = SHORT; 00824 break; 00825 case 'b': 00826 m->in.type = BYTE; 00827 break; 00828 default: 00829 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl; 00830 break; 00831 } 00832 l++; 00833 } 00834 s = l; 00835 if (*l == '+' || *l == '-') 00836 l++; 00837 if (isdigit((unsigned char) *l)) { 00838 m->in.offset = strtol(l, &t, 0); 00839 if (*s == '-') 00840 m->in.offset = -m->in.offset; 00841 } else 00842 t = l; 00843 if (*t++ != ')') { 00844 kdError(7018) << "parse: missing ')' in indirect offset" << endl; 00845 } 00846 l = t; 00847 } 00848 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l)) 00849 ++l; 00850 EATAB; 00851 00852 #define NBYTE 4 00853 #define NSHORT 5 00854 #define NLONG 4 00855 #define NSTRING 6 00856 #define NDATE 4 00857 #define NBESHORT 7 00858 #define NBELONG 6 00859 #define NBEDATE 6 00860 #define NLESHORT 7 00861 #define NLELONG 6 00862 #define NLEDATE 6 00863 00864 if (*l == 'u') { 00865 ++l; 00866 m->flag |= UNSIGNED; 00867 } 00868 /* get type, skip it */ 00869 if (strncmp(l, "byte", NBYTE) == 0) { 00870 m->type = BYTE; 00871 l += NBYTE; 00872 } else if (strncmp(l, "short", NSHORT) == 0) { 00873 m->type = SHORT; 00874 l += NSHORT; 00875 } else if (strncmp(l, "long", NLONG) == 0) { 00876 m->type = LONG; 00877 l += NLONG; 00878 } else if (strncmp(l, "string", NSTRING) == 0) { 00879 m->type = STRING; 00880 l += NSTRING; 00881 } else if (strncmp(l, "date", NDATE) == 0) { 00882 m->type = DATE; 00883 l += NDATE; 00884 } else if (strncmp(l, "beshort", NBESHORT) == 0) { 00885 m->type = BESHORT; 00886 l += NBESHORT; 00887 } else if (strncmp(l, "belong", NBELONG) == 0) { 00888 m->type = BELONG; 00889 l += NBELONG; 00890 } else if (strncmp(l, "bedate", NBEDATE) == 0) { 00891 m->type = BEDATE; 00892 l += NBEDATE; 00893 } else if (strncmp(l, "leshort", NLESHORT) == 0) { 00894 m->type = LESHORT; 00895 l += NLESHORT; 00896 } else if (strncmp(l, "lelong", NLELONG) == 0) { 00897 m->type = LELONG; 00898 l += NLELONG; 00899 } else if (strncmp(l, "ledate", NLEDATE) == 0) { 00900 m->type = LEDATE; 00901 l += NLEDATE; 00902 } else { 00903 kdError(7018) << "parse: type " << l << " invalid" << endl; 00904 return -1; 00905 } 00906 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 00907 if (*l == '&') { 00908 ++l; 00909 m->mask = signextend(m, strtol(l, &l, 0)); 00910 } else 00911 m->mask = (unsigned long) ~0L; 00912 EATAB; 00913 00914 switch (*l) { 00915 case '>': 00916 case '<': 00917 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 00918 case '&': 00919 case '^': 00920 case '=': 00921 m->reln = *l; 00922 ++l; 00923 break; 00924 case '!': 00925 if (m->type != STRING) { 00926 m->reln = *l; 00927 ++l; 00928 break; 00929 } 00930 /* FALL THROUGH */ 00931 default: 00932 if (*l == 'x' && isascii((unsigned char) l[1]) && 00933 isspace((unsigned char) l[1])) { 00934 m->reln = *l; 00935 ++l; 00936 goto GetDesc; /* Bill The Cat */ 00937 } 00938 m->reln = '='; 00939 break; 00940 } 00941 EATAB; 00942 00943 if (getvalue(m, &l)) 00944 return -1; 00945 /* 00946 * now get last part - the description 00947 */ 00948 GetDesc: 00949 EATAB; 00950 if (l[0] == '\b') { 00951 ++l; 00952 m->nospflag = 1; 00953 } else if ((l[0] == '\\') && (l[1] == 'b')) { 00954 ++l; 00955 ++l; 00956 m->nospflag = 1; 00957 } else 00958 m->nospflag = 0; 00959 // Copy description - until EOL or '#' (for comments) 00960 while (*l != '\0' && *l != '#' && i < MAXDESC-1) 00961 m->desc[i++] = *l++; 00962 m->desc[i] = '\0'; 00963 // Remove trailing spaces 00964 while (--i>0 && isspace( m->desc[i] )) 00965 m->desc[i] = '\0'; 00966 00967 // old code 00968 //while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC) /* NULLBODY */ ; 00969 00970 #ifdef DEBUG_APPRENTICE 00971 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl; 00972 #endif 00973 return 0; 00974 } 00975 00976 /* 00977 * Read a numeric value from a pointer, into the value union of a magic 00978 * pointer, according to the magic type. Update the string pointer to point 00979 * just after the number read. Return 0 for success, non-zero for failure. 00980 */ 00981 static int 00982 getvalue(struct magic *m, char **p) 00983 { 00984 int slen; 00985 00986 if (m->type == STRING) { 00987 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); 00988 m->vallen = slen; 00989 } else if (m->reln != 'x') 00990 m->value.l = signextend(m, strtol(*p, p, 0)); 00991 return 0; 00992 } 00993 00994 /* 00995 * Convert a string containing C character escapes. Stop at an unescaped 00996 * space or tab. Copy the converted version to "p", returning its length in 00997 * *slen. Return updated scan pointer as function result. 00998 */ 00999 static char * 01000 getstr(register char *s, register char *p, int plen, int *slen) 01001 { 01002 char *origs = s, 01003 *origp = p; 01004 char *pmax = p + plen - 1; 01005 register int c; 01006 register int val; 01007 01008 while ((c = *s++) != '\0') { 01009 if (isspace((unsigned char) c)) 01010 break; 01011 if (p >= pmax) { 01012 kdError(7018) << "String too long: " << origs << endl; 01013 break; 01014 } 01015 if (c == '\\') { 01016 switch (c = *s++) { 01017 01018 case '\0': 01019 goto out; 01020 01021 default: 01022 *p++ = (char) c; 01023 break; 01024 01025 case 'n': 01026 *p++ = '\n'; 01027 break; 01028 01029 case 'r': 01030 *p++ = '\r'; 01031 break; 01032 01033 case 'b': 01034 *p++ = '\b'; 01035 break; 01036 01037 case 't': 01038 *p++ = '\t'; 01039 break; 01040 01041 case 'f': 01042 *p++ = '\f'; 01043 break; 01044 01045 case 'v': 01046 *p++ = '\v'; 01047 break; 01048 01049 /* \ and up to 3 octal digits */ 01050 case '0': 01051 case '1': 01052 case '2': 01053 case '3': 01054 case '4': 01055 case '5': 01056 case '6': 01057 case '7': 01058 val = c - '0'; 01059 c = *s++; /* try for 2 */ 01060 if (c >= '0' && c <= '7') { 01061 val = (val << 3) | (c - '0'); 01062 c = *s++; /* try for 3 */ 01063 if (c >= '0' && c <= '7') 01064 val = (val << 3) | (c - '0'); 01065 else 01066 --s; 01067 } else 01068 --s; 01069 *p++ = (char) val; 01070 break; 01071 01072 /* \x and up to 3 hex digits */ 01073 case 'x': 01074 val = 'x'; /* Default if no digits */ 01075 c = hextoint(*s++); /* Get next char */ 01076 if (c >= 0) { 01077 val = c; 01078 c = hextoint(*s++); 01079 if (c >= 0) { 01080 val = (val << 4) + c; 01081 c = hextoint(*s++); 01082 if (c >= 0) { 01083 val = (val << 4) + c; 01084 } else 01085 --s; 01086 } else 01087 --s; 01088 } else 01089 --s; 01090 *p++ = (char) val; 01091 break; 01092 } 01093 } else 01094 *p++ = (char) c; 01095 } 01096 out: 01097 *p = '\0'; 01098 *slen = p - origp; 01099 //for ( char* foo = origp; foo < p ; ++foo ) 01100 // kdDebug(7018) << " " << *foo << endl; 01101 return s; 01102 } 01103 01104 01105 /* Single hex char to int; -1 if not a hex char. */ 01106 static int 01107 hextoint(int c) 01108 { 01109 if (!isascii((unsigned char) c)) 01110 return -1; 01111 if (isdigit((unsigned char) c)) 01112 return c - '0'; 01113 if ((c >= 'a') && (c <= 'f')) 01114 return c + 10 - 'a'; 01115 if ((c >= 'A') && (c <= 'F')) 01116 return c + 10 - 'A'; 01117 return -1; 01118 } 01119 01120 /* 01121 * Convert the byte order of the data we are looking at 01122 */ 01123 static int 01124 mconvert(union VALUETYPE *p, struct magic *m) 01125 { 01126 switch (m->type) { 01127 case BYTE: 01128 return 1; 01129 case STRING: 01130 /* Null terminate */ 01131 p->s[sizeof(p->s) - 1] = '\0'; 01132 return 1; 01133 #ifndef WORDS_BIGENDIAN 01134 case SHORT: 01135 #endif 01136 case BESHORT: 01137 p->h = (short) ((p->hs[0] << 8) | (p->hs[1])); 01138 return 1; 01139 #ifndef WORDS_BIGENDIAN 01140 case LONG: 01141 case DATE: 01142 #endif 01143 case BELONG: 01144 case BEDATE: 01145 p->l = (long) 01146 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3])); 01147 return 1; 01148 #ifdef WORDS_BIGENDIAN 01149 case SHORT: 01150 #endif 01151 case LESHORT: 01152 p->h = (short) ((p->hs[1] << 8) | (p->hs[0])); 01153 return 1; 01154 #ifdef WORDS_BIGENDIAN 01155 case LONG: 01156 case DATE: 01157 #endif 01158 case LELONG: 01159 case LEDATE: 01160 p->l = (long) 01161 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0])); 01162 return 1; 01163 default: 01164 kdError(7018) << "mconvert: invalid type " << m->type << endl; 01165 return 0; 01166 } 01167 } 01168 01169 01170 static int 01171 mget(union VALUETYPE *p, unsigned char *s, struct magic *m, 01172 int nbytes) 01173 { 01174 long offset = m->offset; 01175 switch ( m->type ) 01176 { 01177 case BYTE: 01178 if ( offset + 1 > nbytes-1 ) // nbytes = (size of file) + 1 01179 return 0; 01180 break; 01181 case SHORT: 01182 case BESHORT: 01183 case LESHORT: 01184 if ( offset + 2 > nbytes-1 ) 01185 return 0; 01186 break; 01187 case LONG: 01188 case BELONG: 01189 case LELONG: 01190 case DATE: 01191 case BEDATE: 01192 case LEDATE: 01193 if ( offset + 4 > nbytes-1 ) 01194 return 0; 01195 break; 01196 case STRING: 01197 break; 01198 } 01199 01200 // The file length might be < sizeof(union VALUETYPE) (David) 01201 // -> pad with zeros (the 'file' command does it this way) 01202 // Thanks to Stan Covington <stan@calderasystems.com> for detailed report 01203 if (offset + (int)sizeof(union VALUETYPE) > nbytes) 01204 { 01205 int have = nbytes - offset; 01206 memset(p, 0, sizeof(union VALUETYPE)); 01207 if (have > 0) 01208 memcpy(p, s + offset, have); 01209 } else 01210 memcpy(p, s + offset, sizeof(union VALUETYPE)); 01211 01212 if (!mconvert(p, m)) 01213 return 0; 01214 01215 if (m->flag & INDIR) { 01216 01217 switch (m->in.type) { 01218 case BYTE: 01219 offset = p->b + m->in.offset; 01220 break; 01221 case SHORT: 01222 offset = p->h + m->in.offset; 01223 break; 01224 case LONG: 01225 offset = p->l + m->in.offset; 01226 break; 01227 } 01228 01229 if (offset + (int)sizeof(union VALUETYPE) > nbytes) 01230 return 0; 01231 01232 memcpy(p, s + offset, sizeof(union VALUETYPE)); 01233 01234 if (!mconvert(p, m)) 01235 return 0; 01236 } 01237 return 1; 01238 } 01239 01240 static int 01241 mcheck(union VALUETYPE *p, struct magic *m) 01242 { 01243 register unsigned long l = m->value.l; 01244 register unsigned long v; 01245 int matched; 01246 01247 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) { 01248 kdError(7018) << "BOINK" << endl; 01249 return 1; 01250 } 01251 switch (m->type) { 01252 case BYTE: 01253 v = p->b; 01254 break; 01255 01256 case SHORT: 01257 case BESHORT: 01258 case LESHORT: 01259 v = p->h; 01260 break; 01261 01262 case LONG: 01263 case BELONG: 01264 case LELONG: 01265 case DATE: 01266 case BEDATE: 01267 case LEDATE: 01268 v = p->l; 01269 break; 01270 01271 case STRING: 01272 l = 0; 01273 /* 01274 * What we want here is: v = strncmp(m->value.s, p->s, 01275 * m->vallen); but ignoring any nulls. bcmp doesn't give 01276 * -/+/0 and isn't universally available anyway. 01277 */ 01278 v = 0; 01279 { 01280 register unsigned char *a = (unsigned char *) m->value.s; 01281 register unsigned char *b = (unsigned char *) p->s; 01282 register int len = m->vallen; 01283 Q_ASSERT(len); 01284 01285 while (--len >= 0) 01286 if ((v = *b++ - *a++) != 0) 01287 break; 01288 } 01289 break; 01290 default: 01291 kdError(7018) << "mcheck: invalid type " << m->type << endl; 01292 return 0; /* NOTREACHED */ 01293 } 01294 #if 0 01295 qDebug("Before signextend %08x", v); 01296 #endif 01297 v = signextend(m, v) & m->mask; 01298 #if 0 01299 qDebug("After signextend %08x", v); 01300 #endif 01301 01302 switch (m->reln) { 01303 case 'x': 01304 matched = 1; 01305 break; 01306 01307 case '!': 01308 matched = v != l; 01309 break; 01310 01311 case '=': 01312 matched = v == l; 01313 break; 01314 01315 case '>': 01316 if (m->flag & UNSIGNED) 01317 matched = v > l; 01318 else 01319 matched = (long) v > (long) l; 01320 break; 01321 01322 case '<': 01323 if (m->flag & UNSIGNED) 01324 matched = v < l; 01325 else 01326 matched = (long) v < (long) l; 01327 break; 01328 01329 case '&': 01330 matched = (v & l) == l; 01331 break; 01332 01333 case '^': 01334 matched = (v & l) != l; 01335 break; 01336 01337 default: 01338 matched = 0; 01339 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl; 01340 break; /* NOTREACHED */ 01341 } 01342 01343 return matched; 01344 } 01345 01346 /* 01347 * magic_process - process input file fn. Opens the file and reads a 01348 * fixed-size buffer to begin processing the contents. 01349 */ 01350 01351 void process(struct config_rec* conf, const QString & fn) 01352 { 01353 int fd = 0; 01354 unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ 01355 KDE_struct_stat sb; 01356 int nbytes = 0; /* number of bytes read from a datafile */ 01357 int tagbytes = 0; /* size of prefixed tag */ 01358 QCString fileName = QFile::encodeName( fn ); 01359 01360 /* 01361 * first try judging the file based on its filesystem status 01362 */ 01363 if (fsmagic(conf, fileName, &sb) != 0) { 01364 //resultBuf += "\n"; 01365 return; 01366 } 01367 if ((fd = KDE_open(fileName, O_RDONLY)) < 0) { 01368 /* We can't open it, but we were able to stat it. */ 01369 /* 01370 * if (sb.st_mode & 0002) addResult("writable, "); 01371 * if (sb.st_mode & 0111) addResult("executable, "); 01372 */ 01373 //kdDebug(7018) << "can't read `" << fn << "' (" << strerror(errno) << ")." << endl; 01374 conf->resultBuf = MIME_BINARY_UNREADABLE; 01375 return; 01376 } 01377 /* 01378 * try looking at the first HOWMANY bytes 01379 */ 01380 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) { 01381 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl; 01382 conf->resultBuf = MIME_BINARY_UNREADABLE; 01383 return; 01384 } 01385 if ((tagbytes = tagmagic(buf, nbytes))) { 01386 // Read buffer at new position 01387 lseek(fd, tagbytes, SEEK_SET); 01388 nbytes = read(fd, (char*)buf, HOWMANY); 01389 if (nbytes < 0) { 01390 conf->resultBuf = MIME_BINARY_UNREADABLE; 01391 return; 01392 } 01393 } 01394 if (nbytes == 0) { 01395 conf->resultBuf = MIME_BINARY_ZEROSIZE; 01396 } else { 01397 buf[nbytes++] = '\0'; /* null-terminate it */ 01398 tryit(conf, buf, nbytes); 01399 } 01400 01401 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) ) 01402 { 01403 /* 01404 * Try to restore access, modification times if read it. 01405 * This changes the "change" time (ctime), but we can't do anything 01406 * about that. 01407 */ 01408 struct utimbuf utbuf; 01409 utbuf.actime = sb.st_atime; 01410 utbuf.modtime = sb.st_mtime; 01411 (void) utime(fileName, &utbuf); 01412 } 01413 (void) close(fd); 01414 } 01415 01416 01417 static void tryit(struct config_rec* conf, unsigned char *buf, int nb) 01418 { 01419 /* try tests in /etc/magic (or surrogate magic file) */ 01420 if (match(conf, buf, nb)) 01421 return; 01422 01423 /* try known keywords, check for ascii-ness too. */ 01424 if (ascmagic(conf, buf, nb) == 1) 01425 return; 01426 01427 /* see if it's plain text */ 01428 if (textmagic(conf, buf, nb)) 01429 return; 01430 01431 /* abandon hope, all ye who remain here */ 01432 conf->resultBuf = MIME_BINARY_UNKNOWN; 01433 conf->accuracy = 0; 01434 } 01435 01436 static int 01437 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb) 01438 { 01439 int ret = 0; 01440 01441 /* 01442 * Fstat is cheaper but fails for files you don't have read perms on. 01443 * On 4.2BSD and similar systems, use lstat() to identify symlinks. 01444 */ 01445 ret = KDE_lstat(fn, sb); /* don't merge into if; see "ret =" above */ 01446 01447 if (ret) { 01448 return 1; 01449 01450 } 01451 /* 01452 * if (sb->st_mode & S_ISUID) resultBuf += "setuid "; 01453 * if (sb->st_mode & S_ISGID) resultBuf += "setgid "; 01454 * if (sb->st_mode & S_ISVTX) resultBuf += "sticky "; 01455 */ 01456 01457 switch (sb->st_mode & S_IFMT) { 01458 case S_IFDIR: 01459 conf->resultBuf = MIME_INODE_DIR; 01460 return 1; 01461 case S_IFCHR: 01462 conf->resultBuf = MIME_INODE_CDEV; 01463 return 1; 01464 case S_IFBLK: 01465 conf->resultBuf = MIME_INODE_BDEV; 01466 return 1; 01467 /* TODO add code to handle V7 MUX and Blit MUX files */ 01468 #ifdef S_IFIFO 01469 case S_IFIFO: 01470 conf->resultBuf = MIME_INODE_FIFO; 01471 return 1; 01472 #endif 01473 #ifdef S_IFLNK 01474 case S_IFLNK: 01475 { 01476 char buf[BUFSIZ + BUFSIZ + 4]; 01477 register int nch; 01478 KDE_struct_stat tstatbuf; 01479 01480 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) { 01481 conf->resultBuf = MIME_INODE_LINK; 01482 //conf->resultBuf += "\nunreadable"; 01483 return 1; 01484 } 01485 buf[nch] = '\0'; /* readlink(2) forgets this */ 01486 /* If broken symlink, say so and quit early. */ 01487 if (*buf == '/') { 01488 if (KDE_stat(buf, &tstatbuf) < 0) { 01489 conf->resultBuf = MIME_INODE_LINK; 01490 //conf->resultBuf += "\nbroken"; 01491 return 1; 01492 } 01493 } else { 01494 char *tmp; 01495 char buf2[BUFSIZ + BUFSIZ + 4]; 01496 01497 strncpy(buf2, fn, BUFSIZ); 01498 buf2[BUFSIZ] = 0; 01499 01500 if ((tmp = strrchr(buf2, '/')) == NULL) { 01501 tmp = buf; /* in current dir */ 01502 } else { 01503 /* dir part plus (rel.) link */ 01504 *++tmp = '\0'; 01505 strcat(buf2, buf); 01506 tmp = buf2; 01507 } 01508 if (KDE_stat(tmp, &tstatbuf) < 0) { 01509 conf->resultBuf = MIME_INODE_LINK; 01510 //conf->resultBuf += "\nbroken"; 01511 return 1; 01512 } else 01513 strcpy(buf, tmp); 01514 } 01515 if (conf->followLinks) 01516 process( conf, QFile::decodeName( buf ) ); 01517 else 01518 conf->resultBuf = MIME_INODE_LINK; 01519 return 1; 01520 } 01521 return 1; 01522 #endif 01523 #ifdef S_IFSOCK 01524 #ifndef __COHERENT__ 01525 case S_IFSOCK: 01526 conf->resultBuf = MIME_INODE_SOCK; 01527 return 1; 01528 #endif 01529 #endif 01530 case S_IFREG: 01531 break; 01532 default: 01533 kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl; 01534 /* NOTREACHED */ 01535 } 01536 01537 /* 01538 * regular file, check next possibility 01539 */ 01540 if (sb->st_size == 0) { 01541 conf->resultBuf = MIME_BINARY_ZEROSIZE; 01542 return 1; 01543 } 01544 return 0; 01545 } 01546 01547 /* 01548 * Go through the whole list, stopping if you find a match. Process all the 01549 * continuations of that match before returning. 01550 * 01551 * We support multi-level continuations: 01552 * 01553 * At any time when processing a successful top-level match, there is a current 01554 * continuation level; it represents the level of the last successfully 01555 * matched continuation. 01556 * 01557 * Continuations above that level are skipped as, if we see one, it means that 01558 * the continuation that controls them - i.e, the lower-level continuation 01559 * preceding them - failed to match. 01560 * 01561 * Continuations below that level are processed as, if we see one, it means 01562 * we've finished processing or skipping higher-level continuations under the 01563 * control of a successful or unsuccessful lower-level continuation, and are 01564 * now seeing the next lower-level continuation and should process it. The 01565 * current continuation level reverts to the level of the one we're seeing. 01566 * 01567 * Continuations at the current level are processed as, if we see one, there's 01568 * no lower-level continuation that may have failed. 01569 * 01570 * If a continuation matches, we bump the current continuation level so that 01571 * higher-level continuations are processed. 01572 */ 01573 static int 01574 match(struct config_rec* conf, unsigned char *s, int nbytes) 01575 { 01576 int cont_level = 0; 01577 union VALUETYPE p; 01578 struct magic *m; 01579 01580 #ifdef DEBUG_MIMEMAGIC 01581 kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; 01582 for (m = conf->magic; m; m = m->next) { 01583 if (isprint((((unsigned long) m) >> 24) & 255) && 01584 isprint((((unsigned long) m) >> 16) & 255) && 01585 isprint((((unsigned long) m) >> 8) & 255) && 01586 isprint(((unsigned long) m) & 255)) { 01587 kdDebug(7018) << "match: POINTER CLOBBERED! " << endl; 01588 break; 01589 } 01590 } 01591 #endif 01592 01593 for (m = conf->magic; m; m = m->next) { 01594 #ifdef DEBUG_MIMEMAGIC 01595 kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl; 01596 #endif 01597 memset(&p, 0, sizeof(union VALUETYPE)); 01598 01599 /* check if main entry matches */ 01600 if (!mget(&p, s, m, nbytes) || 01601 !mcheck(&p, m)) { 01602 struct magic *m_cont; 01603 01604 /* 01605 * main entry didn't match, flush its continuations 01606 */ 01607 if (!m->next || (m->next->cont_level == 0)) { 01608 continue; 01609 } 01610 m_cont = m->next; 01611 while (m_cont && (m_cont->cont_level != 0)) { 01612 #ifdef DEBUG_MIMEMAGIC 01613 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl; 01614 #endif 01615 /* 01616 * this trick allows us to keep *m in sync 01617 * when the continue advances the pointer 01618 */ 01619 m = m_cont; 01620 m_cont = m_cont->next; 01621 } 01622 continue; 01623 } 01624 /* if we get here, the main entry rule was a match */ 01625 /* this will be the last run through the loop */ 01626 #ifdef DEBUG_MIMEMAGIC 01627 kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl; 01628 #endif 01629 01630 /* remember the match */ 01631 conf->resultBuf = m->desc; 01632 01633 cont_level++; 01634 /* 01635 * while (m && m->next && m->next->cont_level != 0 && ( m = 01636 * m->next )) 01637 */ 01638 m = m->next; 01639 while (m && (m->cont_level != 0)) { 01640 #ifdef DEBUG_MIMEMAGIC 01641 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl; 01642 #endif 01643 if (cont_level >= m->cont_level) { 01644 if (cont_level > m->cont_level) { 01645 /* 01646 * We're at the end of the level 01647 * "cont_level" continuations. 01648 */ 01649 cont_level = m->cont_level; 01650 } 01651 if (mget(&p, s, m, nbytes) && 01652 mcheck(&p, m)) { 01653 /* 01654 * This continuation matched. Print 01655 * its message, with a blank before 01656 * it if the previous item printed 01657 * and this item isn't empty. 01658 */ 01659 #ifdef DEBUG_MIMEMAGIC 01660 kdDebug(7018) << "continuation matched" << endl; 01661 #endif 01662 conf->resultBuf = m->desc; 01663 cont_level++; 01664 } 01665 } 01666 /* move to next continuation record */ 01667 m = m->next; 01668 } 01669 // KDE-specific: need an actual mimetype for a real match 01670 // If we only matched a rule with continuations but no mimetype, it's not a match 01671 if ( !conf->resultBuf.isEmpty() ) 01672 { 01673 #ifdef DEBUG_MIMEMAGIC 01674 kdDebug(7018) << "match: matched" << endl; 01675 #endif 01676 return 1; /* all through */ 01677 } 01678 } 01679 #ifdef DEBUG_MIMEMAGIC 01680 kdDebug(7018) << "match: failed" << endl; 01681 #endif 01682 return 0; /* no match at all */ 01683 } 01684 01685 // Try to parse prefixed tags before matching on content 01686 // Sofar only ID3v2 tags (<=.4) are handled 01687 static int tagmagic(unsigned char *buf, int nbytes) 01688 { 01689 if(nbytes<40) return 0; 01690 if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') { 01691 int size = 10; 01692 // Sanity (known version, no unknown flags) 01693 if(buf[3] > 4) return 0; 01694 if(buf[5] & 0x0F) return 0; 01695 // Tag has v4 footer 01696 if(buf[5] & 0x10) size += 10; 01697 // Calculated syncsafe size 01698 size += buf[9]; 01699 size += buf[8] << 7; 01700 size += buf[7] << 14; 01701 size += buf[6] << 21; 01702 return size; 01703 } 01704 return 0; 01705 } 01706 01707 struct Token { 01708 char *data; 01709 int length; 01710 }; 01711 01712 struct Tokenizer 01713 { 01714 Tokenizer(char* buf, int nbytes) { 01715 data = buf; 01716 length = nbytes; 01717 pos = 0; 01718 } 01719 bool isNewLine() { 01720 return newline; 01721 } 01722 Token* nextToken() { 01723 if (pos == 0) 01724 newline = true; 01725 else 01726 newline = false; 01727 token.data = data+pos; 01728 token.length = 0; 01729 while(pos<length) { 01730 switch (data[pos]) { 01731 case '\n': 01732 newline = true; 01733 case '\0': 01734 case '\t': 01735 case ' ': 01736 case '\r': 01737 case '\f': 01738 case ',': 01739 case ';': 01740 case '>': 01741 if (token.length == 0) token.data++; 01742 else 01743 return &token; 01744 break; 01745 default: 01746 token.length++; 01747 } 01748 pos++; 01749 } 01750 return &token; 01751 } 01752 01753 private: 01754 Token token; 01755 char* data; 01756 int length; 01757 int pos; 01758 bool newline; 01759 }; 01760 01761 01762 /* an optimization over plain strcmp() */ 01763 //#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) 01764 static inline bool STREQ(const Token *token, const char *b) { 01765 const char *a = token->data; 01766 int len = token->length; 01767 if (a == b) return true; 01768 while(*a && *b && len > 0) { 01769 if (*a != *b) return false; 01770 a++; b++; len--; 01771 } 01772 return (len == 0 && *b == 0); 01773 } 01774 01775 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes) 01776 { 01777 int i; 01778 double pct, maxpct, pctsum; 01779 double pcts[NTYPES]; 01780 int mostaccurate, tokencount; 01781 int typeset, jonly, conly, jconly, objconly, cpponly; 01782 int has_escapes = 0; 01783 //unsigned char *s; 01784 //char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */ 01785 01786 /* these are easy, do them first */ 01787 conf->accuracy = 70; 01788 01789 /* 01790 * for troff, look for . + letter + letter or .\"; this must be done 01791 * to disambiguate tar archives' ./file and other trash from real 01792 * troff input. 01793 */ 01794 if (*buf == '.') { 01795 unsigned char *tp = buf + 1; 01796 01797 while (isascii(*tp) && isspace(*tp)) 01798 ++tp; /* skip leading whitespace */ 01799 if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') && 01800 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) { 01801 conf->resultBuf = MIME_APPL_TROFF; 01802 return 1; 01803 } 01804 } 01805 if ((*buf == 'c' || *buf == 'C') && 01806 isascii(*(buf + 1)) && isspace(*(buf + 1))) { 01807 /* Fortran */ 01808 conf->resultBuf = MIME_TEXT_FORTRAN; 01809 return 1; 01810 } 01811 assert(nbytes-1 < HOWMANY + 1); 01812 /* look for tokens - this is expensive! */ 01813 has_escapes = (memchr(buf, '\033', nbytes) != NULL); 01814 Tokenizer tokenizer((char*)buf, nbytes); 01815 const Token* token; 01816 bool linecomment = false, blockcomment = false; 01817 const struct names *p; 01818 int typecount[NTYPES]; 01819 /* 01820 * Fritz: 01821 * Try a little harder on C/C++/Java. 01822 */ 01823 memset(&typecount, 0, sizeof(typecount)); 01824 typeset = 0; 01825 jonly = 0; 01826 conly = 0; 01827 jconly = 0; 01828 objconly = 0; 01829 cpponly = 0; 01830 tokencount = 0; 01831 bool foundClass = false; // mandatory for java 01832 // first collect all possible types and count matches 01833 // we stop at '>' too, because of "<title>blah</title>" on HTML pages 01834 while ((token = tokenizer.nextToken())->length > 0) { 01835 #ifdef DEBUG_MIMEMAGIC 01836 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl; 01837 #endif 01838 if (linecomment && tokenizer.isNewLine()) 01839 linecomment = false; 01840 if (blockcomment && STREQ(token, "*/")) { 01841 blockcomment = false; 01842 continue; 01843 } 01844 for (p = names; p->name ; p++) { 01845 if (STREQ(token, p->name)) { 01846 #ifdef DEBUG_MIMEMAGIC 01847 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl; 01848 #endif 01849 tokencount++; 01850 typeset |= p->type; 01851 if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) { 01852 if (linecomment || blockcomment) { 01853 continue; 01854 } 01855 else { 01856 switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) 01857 { 01858 case L_JAVA: 01859 jonly++; 01860 break; 01861 case L_OBJC: 01862 objconly++; 01863 break; 01864 case L_CPP: 01865 cpponly++; 01866 break; 01867 case (L_CPP|L_JAVA): 01868 jconly++; 01869 if ( !foundClass && STREQ(token, "class") ) 01870 foundClass = true; 01871 break; 01872 case (L_C|L_CPP): 01873 conly++; 01874 break; 01875 default: 01876 if (STREQ(token, "//")) linecomment = true; 01877 if (STREQ(token, "/*")) blockcomment = true; 01878 } 01879 } 01880 } 01881 for (i = 0; i < (int)NTYPES; i++) { 01882 if ((1 << i) & p->type) typecount[i]++; 01883 } 01884 } 01885 } 01886 } 01887 01888 if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) { 01889 conf->accuracy = 60; 01890 if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) { 01891 #ifdef DEBUG_MIMEMAGIC 01892 kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl; 01893 #endif 01894 if (jonly > 1 && foundClass) { 01895 // At least two java-only tokens have matched, including "class" 01896 conf->resultBuf = QString(types[P_JAVA].type); 01897 return 1; 01898 } 01899 if (jconly > 1) { 01900 // At least two non-C (only C++ or Java) token have matched. 01901 if (typecount[P_JAVA] < typecount[P_CPP]) 01902 conf->resultBuf = QString(types[P_CPP].type); 01903 else 01904 conf->resultBuf = QString(types[P_JAVA].type); 01905 return 1; 01906 } 01907 if (conly + cpponly > 1) { 01908 // Either C or C++. 01909 if (cpponly > 0) 01910 conf->resultBuf = QString(types[P_CPP].type); 01911 else 01912 conf->resultBuf = QString(types[P_C].type); 01913 return 1; 01914 } 01915 if (objconly > 0) { 01916 conf->resultBuf = QString(types[P_OBJC].type); 01917 return 1; 01918 } 01919 } 01920 } 01921 01922 /* Neither C, C++ or Java (or all of them without able to distinguish): 01923 * Simply take the token-class with the highest 01924 * matchcount > 0 01925 */ 01926 mostaccurate = -1; 01927 maxpct = pctsum = 0.0; 01928 for (i = 0; i < (int)NTYPES; i++) { 01929 if (typecount[i] > 1) { // one word is not enough, we need at least two 01930 pct = (double)typecount[i] / (double)types[i].kwords * 01931 (double)types[i].weight; 01932 pcts[i] = pct; 01933 pctsum += pct; 01934 if (pct > maxpct) { 01935 maxpct = pct; 01936 mostaccurate = i; 01937 } 01938 #ifdef DEBUG_MIMEMAGIC 01939 kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl; 01940 #endif 01941 } 01942 } 01943 if (mostaccurate >= 0) { 01944 if ( mostaccurate != P_JAVA || foundClass ) // 'class' mandatory for java 01945 { 01946 conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60); 01947 #ifdef DEBUG_MIMEMAGIC 01948 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl; 01949 #endif 01950 conf->resultBuf = QString(types[mostaccurate].type); 01951 return 1; 01952 } 01953 } 01954 01955 switch (is_tar(buf, nbytes)) { 01956 case 1: 01957 /* V7 tar archive */ 01958 conf->resultBuf = MIME_APPL_TAR; 01959 conf->accuracy = 90; 01960 return 1; 01961 case 2: 01962 /* POSIX tar archive */ 01963 conf->resultBuf = MIME_APPL_TAR; 01964 conf->accuracy = 90; 01965 return 1; 01966 } 01967 01968 for (i = 0; i < nbytes; i++) { 01969 if (!isascii(*(buf + i))) 01970 return 0; /* not all ascii */ 01971 } 01972 01973 /* all else fails, but it is ascii... */ 01974 conf->accuracy = 90; 01975 if (has_escapes) { 01976 /* text with escape sequences */ 01977 /* we leave this open for further differentiation later */ 01978 conf->resultBuf = MIME_TEXT_UNKNOWN; 01979 } else { 01980 /* plain text */ 01981 conf->resultBuf = MIME_TEXT_PLAIN; 01982 } 01983 return 1; 01984 } 01985 01986 /* Maximal length of a line we consider "reasonable". */ 01987 #define TEXT_MAXLINELEN 300 01988 01989 // This code is taken from the "file" command, where it is licensed 01990 // in the "beer-ware license" :-) 01991 // Original author: <joerg@FreeBSD.ORG> 01992 // Simplified by David Faure to avoid the static array char[256]. 01993 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes) 01994 { 01995 int i; 01996 unsigned char *cp; 01997 01998 nbytes--; 01999 02000 /* First, look whether there are "unreasonable" characters. */ 02001 for (i = 0, cp = buf; i < nbytes; i++, cp++) 02002 if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F)) 02003 return 0; 02004 02005 /* Now, look whether the file consists of lines of 02006 * "reasonable" length. */ 02007 02008 for (i = 0; i < nbytes;) { 02009 cp = (unsigned char *) memchr(buf, '\n', nbytes - i); 02010 if (cp == NULL) { 02011 /* Don't fail if we hit the end of buffer. */ 02012 if (i + TEXT_MAXLINELEN >= nbytes) 02013 break; 02014 else 02015 return 0; 02016 } 02017 if (cp - buf > TEXT_MAXLINELEN) 02018 return 0; 02019 i += (cp - buf + 1); 02020 buf = cp + 1; 02021 } 02022 conf->resultBuf = MIME_TEXT_PLAIN; 02023 return 1; 02024 } 02025 02026 02027 /* 02028 * is_tar() -- figure out whether file is a tar archive. 02029 * 02030 * Stolen (by author of file utility) from the public domain tar program: Public 02031 * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). 02032 * 02033 * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7 02034 * 1997/06/24 00:41:02 ikluft Exp ikluft $ 02035 * 02036 * Comments changed and some code/comments reformatted for file command by Ian 02037 * Darwin. 02038 */ 02039 02040 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) 02041 02042 /* 02043 * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for 02044 * old UNIX tar file, 2 for Unix Std (POSIX) tar file. 02045 */ 02046 02047 static int 02048 is_tar(unsigned char *buf, int nbytes) 02049 { 02050 register union record *header = (union record *) buf; 02051 register int i; 02052 register long sum, 02053 recsum; 02054 register char *p; 02055 02056 if (nbytes < (int)sizeof(union record)) 02057 return 0; 02058 02059 recsum = from_oct(8, header->header.chksum); 02060 02061 sum = 0; 02062 p = header->charptr; 02063 for (i = sizeof(union record); --i >= 0;) { 02064 /* 02065 * We can't use unsigned char here because of old compilers, 02066 * e.g. V7. 02067 */ 02068 sum += 0xFF & *p++; 02069 } 02070 02071 /* Adjust checksum to count the "chksum" field as blanks. */ 02072 for (i = sizeof(header->header.chksum); --i >= 0;) 02073 sum -= 0xFF & header->header.chksum[i]; 02074 sum += ' ' * sizeof header->header.chksum; 02075 02076 if (sum != recsum) 02077 return 0; /* Not a tar archive */ 02078 02079 if (0 == strcmp(header->header.magic, TMAGIC)) 02080 return 2; /* Unix Standard tar archive */ 02081 02082 return 1; /* Old fashioned tar archive */ 02083 } 02084 02085 02086 /* 02087 * Quick and dirty octal conversion. 02088 * 02089 * Result is -1 if the field is invalid (all blank, or nonoctal). 02090 */ 02091 static long 02092 from_oct(int digs, char *where) 02093 { 02094 register long value; 02095 02096 while (isspace(*where)) { /* Skip spaces */ 02097 where++; 02098 if (--digs <= 0) 02099 return -1; /* All blank field */ 02100 } 02101 value = 0; 02102 while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ 02103 value = (value << 3) | (*where++ - '0'); 02104 --digs; 02105 } 02106 02107 if (digs > 0 && *where && !isspace(*where)) 02108 return -1; /* Ended on non-space/nul */ 02109 02110 return value; 02111 } 02112 02113 KMimeMagic::KMimeMagic() 02114 { 02115 // Magic file detection init 02116 QString mimefile = locate( "mime", "magic" ); 02117 init( mimefile ); 02118 // Add snippets from share/config/magic/* 02119 QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true ); 02120 for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it ) 02121 if ( !mergeConfig( *it ) ) 02122 kdWarning() << k_funcinfo << "Failed to parse " << *it << endl; 02123 } 02124 02125 KMimeMagic::KMimeMagic(const QString & _configfile) 02126 { 02127 init( _configfile ); 02128 } 02129 02130 void KMimeMagic::init( const QString& _configfile ) 02131 { 02132 int result; 02133 conf = new config_rec; 02134 02135 /* set up the magic list (empty) */ 02136 conf->magic = conf->last = NULL; 02137 magicResult = NULL; 02138 conf->followLinks = false; 02139 02140 conf->utimeConf = 0L; // created on demand 02141 /* on the first time through we read the magic file */ 02142 result = apprentice(_configfile); 02143 if (result == -1) 02144 return; 02145 #ifdef MIME_MAGIC_DEBUG_TABLE 02146 test_table(); 02147 #endif 02148 } 02149 02150 /* 02151 * The destructor. 02152 * Free the magic-table and other resources. 02153 */ 02154 KMimeMagic::~KMimeMagic() 02155 { 02156 if (conf) { 02157 struct magic *p = conf->magic; 02158 struct magic *q; 02159 while (p) { 02160 q = p; 02161 p = p->next; 02162 free(q); 02163 } 02164 delete conf->utimeConf; 02165 delete conf; 02166 } 02167 delete magicResult; 02168 } 02169 02170 bool 02171 KMimeMagic::mergeConfig(const QString & _configfile) 02172 { 02173 kdDebug(7018) << k_funcinfo << _configfile << endl; 02174 int result; 02175 02176 if (_configfile.isEmpty()) 02177 return false; 02178 result = apprentice(_configfile); 02179 if (result == -1) { 02180 return false; 02181 } 02182 #ifdef MIME_MAGIC_DEBUG_TABLE 02183 test_table(); 02184 #endif 02185 return true; 02186 } 02187 02188 bool 02189 KMimeMagic::mergeBufConfig(char * _configbuf) 02190 { 02191 int result; 02192 02193 if (conf) { 02194 result = buff_apprentice(_configbuf); 02195 if (result == -1) 02196 return false; 02197 #ifdef MIME_MAGIC_DEBUG_TABLE 02198 test_table(); 02199 #endif 02200 return true; 02201 } 02202 return false; 02203 } 02204 02205 void 02206 KMimeMagic::setFollowLinks( bool _enable ) 02207 { 02208 conf->followLinks = _enable; 02209 } 02210 02211 KMimeMagicResult * 02212 KMimeMagic::findBufferType(const QByteArray &array) 02213 { 02214 unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ 02215 02216 conf->resultBuf = QString::null; 02217 if ( !magicResult ) 02218 magicResult = new KMimeMagicResult(); 02219 magicResult->setInvalid(); 02220 conf->accuracy = 100; 02221 02222 int nbytes = array.size(); 02223 02224 if (nbytes > HOWMANY) 02225 nbytes = HOWMANY; 02226 memcpy(buf, array.data(), nbytes); 02227 if (nbytes == 0) { 02228 conf->resultBuf = MIME_BINARY_ZEROSIZE; 02229 } else { 02230 buf[nbytes++] = '\0'; /* null-terminate it */ 02231 tryit(conf, buf, nbytes); 02232 } 02233 /* if we have any results, put them in the request structure */ 02234 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace()); 02235 magicResult->setAccuracy(conf->accuracy); 02236 return magicResult; 02237 } 02238 02239 static void 02240 refineResult(KMimeMagicResult *r, const QString & _filename) 02241 { 02242 QString tmp = r->mimeType(); 02243 if (tmp.isEmpty()) 02244 return; 02245 if ( tmp == "text/x-c" || tmp == "text/x-objc" ) 02246 { 02247 if ( _filename.right(2) == ".h" ) 02248 tmp += "hdr"; 02249 else 02250 tmp += "src"; 02251 r->setMimeType(tmp); 02252 } 02253 else 02254 if ( tmp == "text/x-c++" ) 02255 { 02256 if ( _filename.endsWith(".h") 02257 || _filename.endsWith(".hh") 02258 || _filename.endsWith(".H") 02259 || !_filename.right(4).contains('.')) 02260 tmp += "hdr"; 02261 else 02262 tmp += "src"; 02263 r->setMimeType(tmp); 02264 } 02265 } 02266 02267 KMimeMagicResult * 02268 KMimeMagic::findBufferFileType( const QByteArray &data, 02269 const QString &fn) 02270 { 02271 KMimeMagicResult * r = findBufferType( data ); 02272 refineResult(r, fn); 02273 return r; 02274 } 02275 02276 /* 02277 * Find the content-type of the given file. 02278 */ 02279 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn) 02280 { 02281 #ifdef DEBUG_MIMEMAGIC 02282 kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl; 02283 #endif 02284 conf->resultBuf = QString::null; 02285 02286 if ( !magicResult ) 02287 magicResult = new KMimeMagicResult(); 02288 magicResult->setInvalid(); 02289 conf->accuracy = 100; 02290 02291 if ( !conf->utimeConf ) 02292 conf->utimeConf = new KMimeMagicUtimeConf(); 02293 02294 /* process it based on the file contents */ 02295 process(conf, fn ); 02296 02297 /* if we have any results, put them in the request structure */ 02298 //finishResult(); 02299 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace()); 02300 magicResult->setAccuracy(conf->accuracy); 02301 refineResult(magicResult, fn); 02302 return magicResult; 02303 }
KDE Logo
This file is part of the documentation for kio Library Version 3.4.0.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Thu Apr 14 00:20:26 2005 by doxygen 1.3.7 written by Dimitri van Heesch, © 1997-2003