00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
#include "kmimemagic.h"
00020
#include <kdebug.h>
00021
#include <kapplication.h>
00022
#include <qfile.h>
00023
#include <ksimpleconfig.h>
00024
#include <kstandarddirs.h>
00025
#include <kstaticdeleter.h>
00026
#include <klargefile.h>
00027
#include <assert.h>
00028
00029
static int fsmagic(
struct config_rec* conf,
const char *fn, KDE_struct_stat *sb);
00030
static void process(
struct config_rec* conf,
const QString &);
00031
static int ascmagic(
struct config_rec* conf,
unsigned char *buf,
int nbytes);
00032
static int tagmagic(
unsigned char *buf,
int nbytes);
00033
static int textmagic(
struct config_rec* conf,
unsigned char *,
int);
00034
00035
static void tryit(
struct config_rec* conf,
unsigned char *buf,
int nb);
00036
static int match(
struct config_rec* conf,
unsigned char *,
int);
00037
00038
KMimeMagic* KMimeMagic::s_pSelf;
00039
static KStaticDeleter<KMimeMagic> kmimemagicsd;
00040
00041 KMimeMagic*
KMimeMagic::self()
00042 {
00043
if( !s_pSelf )
00044 initStatic();
00045
return s_pSelf;
00046 }
00047
00048
void KMimeMagic::initStatic()
00049 {
00050 s_pSelf = kmimemagicsd.setObject( s_pSelf,
new KMimeMagic() );
00051 s_pSelf->
setFollowLinks(
true );
00052 }
00053
00054
#include <stdio.h>
00055
#include <unistd.h>
00056
#include <stdlib.h>
00057
#include <sys/wait.h>
00058
#include <sys/types.h>
00059
#include <sys/stat.h>
00060
#include <fcntl.h>
00061
#include <errno.h>
00062
#include <ctype.h>
00063
#include <time.h>
00064
#include <utime.h>
00065
#include <stdarg.h>
00066
#include <qregexp.h>
00067
#include <qstring.h>
00068
00069
00070
00071
00072
00073
00074
00075
00076
#if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00077
#define DEBUG_LINENUMBERS
00078
#endif
00079
00080
00081
00082
00083
#define MIME_BINARY_UNKNOWN "application/octet-stream"
00084
#define MIME_BINARY_UNREADABLE "application/x-unreadable"
00085
#define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00086
#define MIME_TEXT_UNKNOWN "text/plain"
00087
#define MIME_TEXT_PLAIN "text/plain"
00088
#define MIME_INODE_DIR "inode/directory"
00089
#define MIME_INODE_CDEV "inode/chardevice"
00090
#define MIME_INODE_BDEV "inode/blockdevice"
00091
#define MIME_INODE_FIFO "inode/fifo"
00092
#define MIME_INODE_LINK "inode/link"
00093
#define MIME_INODE_SOCK "inode/socket"
00094
00095
#define MIME_APPL_TROFF "application/x-troff"
00096
#define MIME_APPL_TAR "application/x-tar"
00097
#define MIME_TEXT_FORTRAN "text/x-fortran"
00098
00099
#define MAXMIMESTRING 256
00100
00101
#define HOWMANY 4000
00102
#define MAXDESC 50
00103
#define MAXstring 64
00104
00105
typedef union VALUETYPE {
00106
unsigned char b;
00107
unsigned short h;
00108
unsigned long l;
00109
char s[MAXstring];
00110
unsigned char hs[2];
00111
unsigned char hl[4];
00112 } VALUETYPE;
00113
00114
struct magic {
00115
struct magic *
next;
00116
#ifdef DEBUG_LINENUMBERS
00117
int lineno;
00118
#endif
00119
00120
short flag;
00121
#define INDIR 1
00122
#define UNSIGNED 2
00123
short cont_level;
00124
struct {
00125
char type;
00126
long offset;
00127 } in;
00128
long offset;
00129
unsigned char reln;
00130
char type;
00131
char vallen;
00132
#define BYTE 1
00133
#define SHORT 2
00134
#define LONG 4
00135
#define STRING 5
00136
#define DATE 6
00137
#define BESHORT 7
00138
#define BELONG 8
00139
#define BEDATE 9
00140
#define LESHORT 10
00141
#define LELONG 11
00142
#define LEDATE 12
00143
VALUETYPE value;
00144
unsigned long mask;
00145
char nospflag;
00146
00147
00148
char desc[MAXDESC];
00149 };
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
#define RECORDSIZE 512
00166
#define NAMSIZ 100
00167
#define TUNMLEN 32
00168
#define TGNMLEN 32
00169
00170
union record {
00171
char charptr[RECORDSIZE];
00172
struct header {
00173
char name[NAMSIZ];
00174
char mode[8];
00175
char uid[8];
00176
char gid[8];
00177
char size[12];
00178
char mtime[12];
00179
char chksum[8];
00180
char linkflag;
00181
char linkname[NAMSIZ];
00182
char magic[8];
00183
char uname[TUNMLEN];
00184
char gname[TGNMLEN];
00185
char devmajor[8];
00186
char devminor[8];
00187 } header;
00188 };
00189
00190
00191
#define TMAGIC "ustar "
00192
00193
00194
00195
00196
static int is_tar(
unsigned char *,
int);
00197
static unsigned long signextend(
struct magic *,
unsigned long);
00198
static int getvalue(
struct magic *,
char **);
00199
static int hextoint(
int);
00200
static char *getstr(
char *,
char *,
int,
int *);
00201
static int mget(
union VALUETYPE *,
unsigned char *,
struct magic *,
int);
00202
static int mcheck(
union VALUETYPE *,
struct magic *);
00203
static int mconvert(
union VALUETYPE *,
struct magic *);
00204
static long from_oct(
int,
char *);
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
#define L_HTML 0x001
00221
#define L_C 0x002
00222
#define L_MAKE 0x004
00223
#define L_PLI 0x008
00224
#define L_MACH 0x010
00225
#define L_PAS 0x020
00226
#define L_JAVA 0x040
00227
#define L_CPP 0x080
00228
#define L_MAIL 0x100
00229
#define L_NEWS 0x200
00230
#define L_DIFF 0x400
00231
#define L_OBJC 0x800
00232
00233
#define P_HTML 0
00234
#define P_C 1
00235
#define P_MAKE 2
00236
#define P_PLI 3
00237
#define P_MACH 4
00238
#define P_PAS 5
00239
#define P_JAVA 6
00240
#define P_CPP 7
00241
#define P_MAIL 8
00242
#define P_NEWS 9
00243
#define P_DIFF 10
00244
#define P_OBJC 11
00245
00246
typedef struct asc_type {
00247
const char *type;
00248
int kwords;
00249
double weight;
00250 } asc_type;
00251
00252
static const asc_type types[] = {
00253 {
"text/html", 19, 2 },
00254 {
"text/x-c", 13, 1 },
00255 {
"text/x-makefile", 4, 1.9 },
00256 {
"text/x-pli", 1, 3 },
00257 {
"text/x-assembler", 6, 2.1 },
00258 {
"text/x-pascal", 1, 1 },
00259 {
"text/x-java", 12, 1 },
00260 {
"text/x-c++", 19, 1 },
00261 {
"message/rfc822", 4, 1.9 },
00262 {
"message/news", 3, 2 },
00263 {
"text/x-diff", 4, 2 },
00264 {
"text/x-objc", 10, 1 }
00265 };
00266
00267
#define NTYPES (sizeof(types)/sizeof(asc_type))
00268
00269
static struct names {
00270
const char *
name;
00271
short type;
00272 }
const names[] = {
00273 {
00274
"<html", L_HTML
00275 },
00276 {
00277
"<HTML", L_HTML
00278 },
00279 {
00280
"<head", L_HTML
00281 },
00282 {
00283
"<HEAD", L_HTML
00284 },
00285 {
00286
"<body", L_HTML
00287 },
00288 {
00289
"<BODY", L_HTML
00290 },
00291 {
00292
"<title", L_HTML
00293 },
00294 {
00295
"<TITLE", L_HTML
00296 },
00297 {
00298
"<h1", L_HTML
00299 },
00300 {
00301
"<H1", L_HTML
00302 },
00303 {
00304
"<a", L_HTML
00305 },
00306 {
00307
"<A", L_HTML
00308 },
00309 {
00310
"<img", L_HTML
00311 },
00312 {
00313
"<IMG", L_HTML
00314 },
00315 {
00316
"<!--", L_HTML
00317 },
00318 {
00319
"<!doctype", L_HTML
00320 },
00321 {
00322
"<!DOCTYPE", L_HTML
00323 },
00324 {
00325
"<div", L_HTML
00326 },
00327 {
00328
"<DIV", L_HTML
00329 },
00330 {
00331
"<frame", L_HTML
00332 },
00333 {
00334
"<FRAME", L_HTML
00335 },
00336 {
00337
"<frameset", L_HTML
00338 },
00339 {
00340
"<FRAMESET", L_HTML
00341 },
00342 {
00343
"<script", L_HTML
00344 },
00345 {
00346
"<SCRIPT", L_HTML
00347 },
00348 {
00349
"/*", L_C|L_CPP|L_JAVA|L_OBJC
00350 },
00351 {
00352
"//", L_C|L_CPP|L_JAVA|L_OBJC
00353 },
00354 {
00355
"#include", L_C|L_CPP
00356 },
00357 {
00358
"#ifdef", L_C|L_CPP
00359 },
00360 {
00361
"#ifndef", L_C|L_CPP
00362 },
00363 {
00364
"bool", L_C|L_CPP
00365 },
00366 {
00367
"char", L_C|L_CPP|L_JAVA|L_OBJC
00368 },
00369 {
00370
"int", L_C|L_CPP|L_JAVA|L_OBJC
00371 },
00372 {
00373
"float", L_C|L_CPP|L_JAVA|L_OBJC
00374 },
00375 {
00376
"void", L_C|L_CPP|L_JAVA|L_OBJC
00377 },
00378 {
00379
"extern", L_C|L_CPP
00380 },
00381 {
00382
"struct", L_C|L_CPP
00383 },
00384 {
00385
"union", L_C|L_CPP
00386 },
00387 {
00388
"implements", L_JAVA
00389 },
00390 {
00391
"super", L_JAVA
00392 },
00393 {
00394
"import", L_JAVA
00395 },
00396 {
00397
"class", L_CPP|L_JAVA
00398 },
00399 {
00400
"public", L_CPP|L_JAVA
00401 },
00402 {
00403
"private", L_CPP|L_JAVA
00404 },
00405 {
00406
"explicit", L_CPP
00407 },
00408 {
00409
"virtual", L_CPP
00410 },
00411 {
00412
"namespace", L_CPP
00413 },
00414 {
00415
"#import", L_OBJC
00416 },
00417 {
00418
"@interface", L_OBJC
00419 },
00420 {
00421
"@implementation", L_OBJC
00422 },
00423 {
00424
"@protocol", L_OBJC
00425 },
00426 {
00427
"CFLAGS", L_MAKE
00428 },
00429 {
00430
"LDFLAGS", L_MAKE
00431 },
00432 {
00433
"all:", L_MAKE
00434 },
00435 {
00436
".PHONY:", L_MAKE
00437 },
00438 {
00439
"srcdir", L_MAKE
00440 },
00441 {
00442
"exec_prefix", L_MAKE
00443 },
00444
00445
00446
00447
00448 {
00449
".ascii", L_MACH
00450 },
00451 {
00452
".asciiz", L_MACH
00453 },
00454 {
00455
".byte", L_MACH
00456 },
00457 {
00458
".even", L_MACH
00459 },
00460 {
00461
".globl", L_MACH
00462 },
00463 {
00464
"clr", L_MACH
00465 },
00466 {
00467
"(input", L_PAS
00468 },
00469 {
00470
"dcl", L_PLI
00471 },
00472 {
00473
"Received:", L_MAIL
00474 },
00475
00476
00477
00478 {
00479
"Return-Path:", L_MAIL
00480 },
00481 {
00482
"Cc:", L_MAIL
00483 },
00484 {
00485
"Newsgroups:", L_NEWS
00486 },
00487 {
00488
"Path:", L_NEWS
00489 },
00490 {
00491
"Organization:", L_NEWS
00492 },
00493 {
00494
"---", L_DIFF
00495 },
00496 {
00497
"+++", L_DIFF
00498 },
00499 {
00500
"***", L_DIFF
00501 },
00502 {
00503
"@@", L_DIFF
00504 },
00505 {
00506 NULL, 0
00507 }
00508 };
00509
00520
class KMimeMagicUtimeConf
00521 {
00522
public:
00523 KMimeMagicUtimeConf()
00524 {
00525 tmpDirs << QString::fromLatin1(
"/tmp");
00526
00527
00528
00529
QStringList confDirs =
KGlobal::dirs()->
resourceDirs(
"config" );
00530
if ( !confDirs.isEmpty() )
00531 {
00532
QString globalConf = confDirs.last() +
"kmimemagicrc";
00533
if ( QFile::exists( globalConf ) )
00534 {
00535
KSimpleConfig cfg( globalConf );
00536 cfg.
setGroup(
"Settings" );
00537 tmpDirs = cfg.
readListEntry(
"atimeDirs" );
00538 }
00539
if ( confDirs.count() > 1 )
00540 {
00541
QString localConf = confDirs.first() +
"kmimemagicrc";
00542
if ( QFile::exists( localConf ) )
00543 {
00544
KSimpleConfig cfg( localConf );
00545 cfg.
setGroup(
"Settings" );
00546 tmpDirs += cfg.
readListEntry(
"atimeDirs" );
00547 }
00548 }
00549
for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00550 {
00551
QString dir = *it;
00552
if ( !dir.isEmpty() && dir[ dir.length()-1 ] !=
'/' )
00553 (*it) +=
'/';
00554 }
00555 }
00556
#if 0
00557
00558
for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00559
kdDebug(7018) <<
" atimeDir: " << *it <<
endl;
00560
#endif
00561
}
00562
00563
bool restoreAccessTime(
const QString & file )
const
00564
{
00565
QString dir = file.left( file.findRev(
'/' ) );
00566
bool res = tmpDirs.contains( dir );
00567
00568
return res;
00569 }
00570
QStringList tmpDirs;
00571 };
00572
00573
00574
struct config_rec {
00575
bool followLinks;
00576
QString resultBuf;
00577
int accuracy;
00578
00579
struct magic *magic,
00580 *last;
00581 KMimeMagicUtimeConf * utimeConf;
00582 };
00583
00584
#ifdef MIME_MAGIC_DEBUG_TABLE
00585
static void
00586 test_table()
00587 {
00588
struct magic *m;
00589
struct magic *prevm = NULL;
00590
00591
kdDebug(7018) <<
"test_table : started" <<
endl;
00592
for (m = conf->magic; m; m = m->next) {
00593
if (isprint((((
unsigned long) m) >> 24) & 255) &&
00594 isprint((((
unsigned long) m) >> 16) & 255) &&
00595 isprint((((
unsigned long) m) >> 8) & 255) &&
00596 isprint(((
unsigned long) m) & 255)) {
00597
00598
00599 (((
unsigned long) m) >> 24) & 255,
00600 (((
unsigned long) m) >> 16) & 255,
00601 (((
unsigned long) m) >> 8) & 255,
00602 ((
unsigned long) m) & 255,
00603 prevm ? prevm->lineno : -1);
00604
break;
00605 }
00606 prevm = m;
00607 }
00608 }
00609
#endif
00610
00611
#define EATAB {while (isascii((unsigned char) *l) && \
00612
isspace((unsigned char) *l)) ++l;}
00613
00614
int KMimeMagic::parse_line(
char *line,
int *rule,
int lineno)
00615 {
00616
int ws_offset;
00617
00618
00619
if (line[0]) {
00620 line[strlen(line) - 1] =
'\0';
00621 }
00622
00623 ws_offset = 0;
00624
while (line[ws_offset] && isspace(line[ws_offset])) {
00625 ws_offset++;
00626 }
00627
00628
00629
if (line[ws_offset] == 0) {
00630
return 0;
00631 }
00632
00633
if (line[ws_offset] ==
'#')
00634
return 0;
00635
00636
00637 (*rule)++;
00638
00639
00640
return (parse(line + ws_offset, lineno) != 0);
00641 }
00642
00643
00644
00645
00646
int KMimeMagic::apprentice(
const QString& magicfile )
00647 {
00648 FILE *f;
00649
char line[BUFSIZ + 1];
00650
int errs = 0;
00651
int lineno;
00652
int rule = 0;
00653
QCString fname;
00654
00655
if (magicfile.isEmpty())
00656
return -1;
00657 fname = QFile::encodeName(magicfile);
00658 f = fopen(fname,
"r");
00659
if (f == NULL) {
00660
kdError(7018) <<
"can't read magic file " << fname.data() <<
": " << strerror(errno) <<
endl;
00661
return -1;
00662 }
00663
00664
00665
for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00666
if (parse_line(line, &rule, lineno))
00667 errs++;
00668
00669 fclose(f);
00670
00671
#ifdef DEBUG_APPRENTICE
00672
kdDebug(7018) <<
"apprentice: conf=" << conf <<
" file=" << magicfile <<
" m=" << (conf->magic ?
"set" :
"NULL") <<
" m->next=" << ((conf->magic && conf->magic->next) ?
"set" :
"NULL") <<
" last=" << (conf->last ?
"set" :
"NULL") <<
endl;
00673
kdDebug(7018) <<
"apprentice: read " << lineno <<
" lines, " << rule <<
" rules, " << errs <<
" errors" <<
endl;
00674
#endif
00675
00676
#ifdef MIME_MAGIC_DEBUG_TABLE
00677
test_table();
00678
#endif
00679
00680
return (errs ? -1 : 0);
00681 }
00682
00683
int KMimeMagic::buff_apprentice(
char *buff)
00684 {
00685
char line[BUFSIZ + 2];
00686
int errs = 0;
00687
int lineno = 1;
00688
char *start = buff;
00689
char *
end;
00690
int count = 0;
00691
int rule = 0;
00692
int len = strlen(buff) + 1;
00693
00694
00695
do {
00696 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00697 strncpy(line, start, count);
00698 line[count] =
'\0';
00699
if ((
end = strchr(line,
'\n'))) {
00700 *(++
end) =
'\0';
00701 count = strlen(line);
00702 }
else
00703 strcat(line,
"\n");
00704 start += count;
00705 len -= count;
00706
if (parse_line(line, &rule, lineno))
00707 errs++;
00708 lineno++;
00709 }
while (len > 0);
00710
00711
#ifdef DEBUG_APPRENTICE
00712
kdDebug(7018) <<
"buff_apprentice: conf=" << conf <<
" m=" << (conf->magic ?
"set" :
"NULL") <<
" m->next=" << ((conf->magic && conf->magic->next) ?
"set" :
"NULL") <<
" last=" << (conf->last ?
"set" :
"NULL") <<
endl;
00713
kdDebug(7018) <<
"buff_apprentice: read " << lineno <<
" lines, " << rule <<
" rules, " << errs <<
" errors" <<
endl;
00714
#endif
00715
00716
#ifdef MIME_MAGIC_DEBUG_TABLE
00717
test_table();
00718
#endif
00719
00720
return (errs ? -1 : 0);
00721 }
00722
00723
00724
00725
00726
static unsigned long
00727 signextend(
struct magic *m,
unsigned long v)
00728 {
00729
if (!(m->flag & UNSIGNED))
00730
switch (m->type) {
00731
00732
00733
00734
00735
00736
case BYTE:
00737 v = (
char) v;
00738
break;
00739
case SHORT:
00740
case BESHORT:
00741
case LESHORT:
00742 v = (
short) v;
00743
break;
00744
case DATE:
00745
case BEDATE:
00746
case LEDATE:
00747
case LONG:
00748
case BELONG:
00749
case LELONG:
00750 v = (
long) v;
00751
break;
00752
case STRING:
00753
break;
00754
default:
00755
kdError(7018) <<
"" <<
"signextend" <<
": can't happen: m->type=" << m->type <<
endl;
00756
return 998;
00757 }
00758
return v;
00759 }
00760
00761
00762
00763
00764
int KMimeMagic::parse(
char *l,
int
00765 #ifdef DEBUG_LINENUMBERS
00766 lineno
00767 #endif
00768 )
00769 {
00770
int i = 0;
00771
struct magic *m;
00772
char *t,
00773 *s;
00774
00775
if ((m = (
struct magic *) calloc(1,
sizeof(
struct magic))) == NULL) {
00776
kdError(7018) <<
"parse: Out of memory." <<
endl;
00777
return -1;
00778 }
00779
00780 m->next = NULL;
00781
if (!conf->magic || !conf->last) {
00782 conf->magic = conf->last = m;
00783 }
else {
00784 conf->last->next = m;
00785 conf->last = m;
00786 }
00787
00788
00789 m->flag = 0;
00790 m->cont_level = 0;
00791
#ifdef DEBUG_LINENUMBERS
00792
m->lineno = lineno;
00793
#endif
00794
00795
while (*l ==
'>') {
00796 ++l;
00797 m->cont_level++;
00798 }
00799
00800
if (m->cont_level != 0 && *l ==
'(') {
00801 ++l;
00802 m->flag |= INDIR;
00803 }
00804
00805 m->offset = (
int) strtol(l, &t, 0);
00806
if (l == t) {
00807
kdError(7018) <<
"parse: offset " << l <<
" invalid" <<
endl;
00808 }
00809 l = t;
00810
00811
if (m->flag & INDIR) {
00812 m->in.type = LONG;
00813 m->in.offset = 0;
00814
00815
00816
00817
if (*l ==
'.') {
00818
switch (*++l) {
00819
case 'l':
00820 m->in.type = LONG;
00821
break;
00822
case 's':
00823 m->in.type = SHORT;
00824
break;
00825
case 'b':
00826 m->in.type = BYTE;
00827
break;
00828
default:
00829
kdError(7018) <<
"parse: indirect offset type " << *l <<
" invalid" <<
endl;
00830
break;
00831 }
00832 l++;
00833 }
00834 s = l;
00835
if (*l ==
'+' || *l ==
'-')
00836 l++;
00837
if (isdigit((
unsigned char) *l)) {
00838 m->in.offset = strtol(l, &t, 0);
00839
if (*s ==
'-')
00840 m->in.offset = -m->in.offset;
00841 }
else
00842 t = l;
00843
if (*t++ !=
')') {
00844
kdError(7018) <<
"parse: missing ')' in indirect offset" <<
endl;
00845 }
00846 l = t;
00847 }
00848
while (isascii((
unsigned char) *l) && isdigit((
unsigned char) *l))
00849 ++l;
00850 EATAB;
00851
00852
#define NBYTE 4
00853
#define NSHORT 5
00854
#define NLONG 4
00855
#define NSTRING 6
00856
#define NDATE 4
00857
#define NBESHORT 7
00858
#define NBELONG 6
00859
#define NBEDATE 6
00860
#define NLESHORT 7
00861
#define NLELONG 6
00862
#define NLEDATE 6
00863
00864
if (*l ==
'u') {
00865 ++l;
00866 m->flag |= UNSIGNED;
00867 }
00868
00869
if (strncmp(l,
"byte", NBYTE) == 0) {
00870 m->type = BYTE;
00871 l += NBYTE;
00872 }
else if (strncmp(l,
"short", NSHORT) == 0) {
00873 m->type = SHORT;
00874 l += NSHORT;
00875 }
else if (strncmp(l,
"long", NLONG) == 0) {
00876 m->type = LONG;
00877 l += NLONG;
00878 }
else if (strncmp(l,
"string", NSTRING) == 0) {
00879 m->type = STRING;
00880 l += NSTRING;
00881 }
else if (strncmp(l,
"date", NDATE) == 0) {
00882 m->type = DATE;
00883 l += NDATE;
00884 }
else if (strncmp(l,
"beshort", NBESHORT) == 0) {
00885 m->type = BESHORT;
00886 l += NBESHORT;
00887 }
else if (strncmp(l,
"belong", NBELONG) == 0) {
00888 m->type = BELONG;
00889 l += NBELONG;
00890 }
else if (strncmp(l,
"bedate", NBEDATE) == 0) {
00891 m->type = BEDATE;
00892 l += NBEDATE;
00893 }
else if (strncmp(l,
"leshort", NLESHORT) == 0) {
00894 m->type = LESHORT;
00895 l += NLESHORT;
00896 }
else if (strncmp(l,
"lelong", NLELONG) == 0) {
00897 m->type = LELONG;
00898 l += NLELONG;
00899 }
else if (strncmp(l,
"ledate", NLEDATE) == 0) {
00900 m->type = LEDATE;
00901 l += NLEDATE;
00902 }
else {
00903
kdError(7018) <<
"parse: type " << l <<
" invalid" <<
endl;
00904
return -1;
00905 }
00906
00907
if (*l ==
'&') {
00908 ++l;
00909 m->mask = signextend(m, strtol(l, &l, 0));
00910 }
else
00911 m->mask = (
unsigned long) ~0L;
00912 EATAB;
00913
00914
switch (*l) {
00915
case '>':
00916
case '<':
00917
00918
case '&':
00919
case '^':
00920
case '=':
00921 m->reln = *l;
00922 ++l;
00923
break;
00924
case '!':
00925
if (m->type != STRING) {
00926 m->reln = *l;
00927 ++l;
00928
break;
00929 }
00930
00931
default:
00932
if (*l ==
'x' && isascii((
unsigned char) l[1]) &&
00933 isspace((
unsigned char) l[1])) {
00934 m->reln = *l;
00935 ++l;
00936
goto GetDesc;
00937 }
00938 m->reln =
'=';
00939
break;
00940 }
00941 EATAB;
00942
00943
if (getvalue(m, &l))
00944
return -1;
00945
00946
00947
00948 GetDesc:
00949 EATAB;
00950
if (l[0] ==
'\b') {
00951 ++l;
00952 m->nospflag = 1;
00953 }
else if ((l[0] ==
'\\') && (l[1] ==
'b')) {
00954 ++l;
00955 ++l;
00956 m->nospflag = 1;
00957 }
else
00958 m->nospflag = 0;
00959
00960
while (*l !=
'\0' && *l !=
'#' && i < MAXDESC-1)
00961 m->desc[i++] = *l++;
00962 m->desc[i] =
'\0';
00963
00964
while (--i>0 && isspace( m->desc[i] ))
00965 m->desc[i] =
'\0';
00966
00967
00968
00969
00970
#ifdef DEBUG_APPRENTICE
00971
kdDebug(7018) <<
"parse: line=" << lineno <<
" m=" << m <<
" next=" << m->next <<
" cont=" << m->cont_level <<
" desc=" << (m->desc ? m->desc :
"NULL") <<
endl;
00972
#endif
00973
return 0;
00974 }
00975
00976
00977
00978
00979
00980
00981
static int
00982 getvalue(
struct magic *m,
char **p)
00983 {
00984
int slen;
00985
00986
if (m->type == STRING) {
00987 *p = getstr(*p, m->value.s,
sizeof(m->value.s), &slen);
00988 m->vallen = slen;
00989 }
else if (m->reln !=
'x')
00990 m->value.l = signextend(m, strtol(*p, p, 0));
00991
return 0;
00992 }
00993
00994
00995
00996
00997
00998
00999
static char *
01000 getstr(
register char *s,
register char *p,
int plen,
int *slen)
01001 {
01002
char *origs = s,
01003 *origp = p;
01004
char *pmax = p + plen - 1;
01005
register int c;
01006
register int val;
01007
01008
while ((c = *s++) !=
'\0') {
01009
if (isspace((
unsigned char) c))
01010
break;
01011
if (p >= pmax) {
01012
kdError(7018) <<
"String too long: " << origs <<
endl;
01013
break;
01014 }
01015
if (c ==
'\\') {
01016
switch (c = *s++) {
01017
01018
case '\0':
01019
goto out;
01020
01021
default:
01022 *p++ = (
char) c;
01023
break;
01024
01025
case 'n':
01026 *p++ =
'\n';
01027
break;
01028
01029
case 'r':
01030 *p++ =
'\r';
01031
break;
01032
01033
case 'b':
01034 *p++ =
'\b';
01035
break;
01036
01037
case 't':
01038 *p++ =
'\t';
01039
break;
01040
01041
case 'f':
01042 *p++ =
'\f';
01043
break;
01044
01045
case 'v':
01046 *p++ =
'\v';
01047
break;
01048
01049
01050
case '0':
01051
case '1':
01052
case '2':
01053
case '3':
01054
case '4':
01055
case '5':
01056
case '6':
01057
case '7':
01058 val = c -
'0';
01059 c = *s++;
01060
if (c >=
'0' && c <=
'7') {
01061 val = (val << 3) | (c -
'0');
01062 c = *s++;
01063
if (c >=
'0' && c <=
'7')
01064 val = (val << 3) | (c -
'0');
01065
else
01066 --s;
01067 }
else
01068 --s;
01069 *p++ = (
char) val;
01070
break;
01071
01072
01073
case 'x':
01074 val =
'x';
01075 c = hextoint(*s++);
01076
if (c >= 0) {
01077 val = c;
01078 c = hextoint(*s++);
01079
if (c >= 0) {
01080 val = (val << 4) + c;
01081 c = hextoint(*s++);
01082
if (c >= 0) {
01083 val = (val << 4) + c;
01084 }
else
01085 --s;
01086 }
else
01087 --s;
01088 }
else
01089 --s;
01090 *p++ = (
char) val;
01091
break;
01092 }
01093 }
else
01094 *p++ = (
char) c;
01095 }
01096 out:
01097 *p =
'\0';
01098 *slen = p - origp;
01099
01100
01101
return s;
01102 }
01103
01104
01105
01106
static int
01107 hextoint(
int c)
01108 {
01109
if (!isascii((
unsigned char) c))
01110
return -1;
01111
if (isdigit((
unsigned char) c))
01112
return c -
'0';
01113
if ((c >=
'a') && (c <=
'f'))
01114
return c + 10 -
'a';
01115
if ((c >=
'A') && (c <=
'F'))
01116
return c + 10 -
'A';
01117
return -1;
01118 }
01119
01120
01121
01122
01123
static int
01124 mconvert(
union VALUETYPE *p,
struct magic *m)
01125 {
01126
switch (m->type) {
01127
case BYTE:
01128
return 1;
01129
case STRING:
01130
01131 p->s[
sizeof(p->s) - 1] =
'\0';
01132
return 1;
01133
#ifndef WORDS_BIGENDIAN
01134
case SHORT:
01135
#endif
01136
case BESHORT:
01137 p->h = (
short) ((p->hs[0] << 8) | (p->hs[1]));
01138
return 1;
01139
#ifndef WORDS_BIGENDIAN
01140
case LONG:
01141
case DATE:
01142
#endif
01143
case BELONG:
01144
case BEDATE:
01145 p->l = (
long)
01146 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01147
return 1;
01148
#ifdef WORDS_BIGENDIAN
01149
case SHORT:
01150
#endif
01151
case LESHORT:
01152 p->h = (
short) ((p->hs[1] << 8) | (p->hs[0]));
01153
return 1;
01154
#ifdef WORDS_BIGENDIAN
01155
case LONG:
01156
case DATE:
01157
#endif
01158
case LELONG:
01159
case LEDATE:
01160 p->l = (
long)
01161 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01162
return 1;
01163
default:
01164
kdError(7018) <<
"mconvert: invalid type " << m->type <<
endl;
01165
return 0;
01166 }
01167 }
01168
01169
01170
static int
01171 mget(
union VALUETYPE *p,
unsigned char *s,
struct magic *m,
01172
int nbytes)
01173 {
01174
long offset = m->offset;
01175
switch ( m->type )
01176 {
01177
case BYTE:
01178
if ( offset + 1 > nbytes-1 )
01179
return 0;
01180
break;
01181
case SHORT:
01182
case BESHORT:
01183
case LESHORT:
01184
if ( offset + 2 > nbytes-1 )
01185
return 0;
01186
break;
01187
case LONG:
01188
case BELONG:
01189
case LELONG:
01190
case DATE:
01191
case BEDATE:
01192
case LEDATE:
01193
if ( offset + 4 > nbytes-1 )
01194
return 0;
01195
break;
01196
case STRING:
01197
break;
01198 }
01199
01200
01201
01202
01203
if (offset + (
int)
sizeof(
union VALUETYPE) > nbytes)
01204 {
01205
int have = nbytes - offset;
01206 memset(p, 0,
sizeof(
union VALUETYPE));
01207
if (have > 0)
01208 memcpy(p, s + offset, have);
01209 }
else
01210 memcpy(p, s + offset,
sizeof(
union VALUETYPE));
01211
01212
if (!mconvert(p, m))
01213
return 0;
01214
01215
if (m->flag & INDIR) {
01216
01217
switch (m->in.type) {
01218
case BYTE:
01219 offset = p->b + m->in.offset;
01220
break;
01221
case SHORT:
01222 offset = p->h + m->in.offset;
01223
break;
01224
case LONG:
01225 offset = p->l + m->in.offset;
01226
break;
01227 }
01228
01229
if (offset + (
int)
sizeof(
union VALUETYPE) > nbytes)
01230 return 0;
01231
01232 memcpy(p, s + offset,
sizeof(
union VALUETYPE));
01233
01234
if (!mconvert(p, m))
01235
return 0;
01236 }
01237
return 1;
01238 }
01239
01240
static int
01241 mcheck(
union VALUETYPE *p,
struct magic *m)
01242 {
01243
register unsigned long l = m->value.l;
01244
register unsigned long v;
01245
int matched;
01246
01247
if ((m->value.s[0] ==
'x') && (m->value.s[1] ==
'\0')) {
01248
kdError(7018) <<
"BOINK" <<
endl;
01249
return 1;
01250 }
01251
switch (m->type) {
01252
case BYTE:
01253 v = p->b;
01254
break;
01255
01256
case SHORT:
01257
case BESHORT:
01258
case LESHORT:
01259 v = p->h;
01260
break;
01261
01262
case LONG:
01263
case BELONG:
01264
case LELONG:
01265
case DATE:
01266
case BEDATE:
01267
case LEDATE:
01268 v = p->l;
01269
break;
01270
01271
case STRING:
01272 l = 0;
01273
01274
01275
01276
01277
01278 v = 0;
01279 {
01280
register unsigned char *a = (
unsigned char *) m->value.s;
01281
register unsigned char *b = (
unsigned char *) p->s;
01282
register int len = m->vallen;
01283 Q_ASSERT(len);
01284
01285
while (--len >= 0)
01286
if ((v = *b++ - *a++) != 0)
01287
break;
01288 }
01289
break;
01290
default:
01291
kdError(7018) <<
"mcheck: invalid type " << m->type <<
endl;
01292
return 0;
01293 }
01294
#if 0
01295
qDebug(
"Before signextend %08x", v);
01296
#endif
01297
v = signextend(m, v) & m->mask;
01298
#if 0
01299
qDebug(
"After signextend %08x", v);
01300
#endif
01301
01302
switch (m->reln) {
01303
case 'x':
01304 matched = 1;
01305
break;
01306
01307
case '!':
01308 matched = v != l;
01309
break;
01310
01311
case '=':
01312 matched = v == l;
01313
break;
01314
01315
case '>':
01316
if (m->flag & UNSIGNED)
01317 matched = v > l;
01318
else
01319 matched = (
long) v > (
long) l;
01320
break;
01321
01322
case '<':
01323
if (m->flag & UNSIGNED)
01324 matched = v < l;
01325
else
01326 matched = (
long) v < (
long) l;
01327
break;
01328
01329
case '&':
01330 matched = (v & l) == l;
01331
break;
01332
01333
case '^':
01334 matched = (v & l) != l;
01335
break;
01336
01337
default:
01338 matched = 0;
01339
kdError(7018) <<
"mcheck: can't happen: invalid relation " << m->reln <<
"." <<
endl;
01340
break;
01341 }
01342
01343
return matched;
01344 }
01345
01346
01347
01348
01349
01350
01351
void process(
struct config_rec* conf,
const QString & fn)
01352 {
01353
int fd = 0;
01354
unsigned char buf[HOWMANY + 1];
01355 KDE_struct_stat sb;
01356
int nbytes = 0;
01357
int tagbytes = 0;
01358
QCString fileName = QFile::encodeName( fn );
01359
01360
01361
01362
01363
if (fsmagic(conf, fileName, &sb) != 0) {
01364
01365
return;
01366 }
01367
if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01368
01369
01370
01371
01372
01373
01374 conf->resultBuf = MIME_BINARY_UNREADABLE;
01375
return;
01376 }
01377
01378
01379
01380
if ((nbytes = read(fd, (
char *) buf, HOWMANY)) == -1) {
01381
kdError(7018) <<
"" << fn <<
" read failed (" << strerror(errno) <<
")." <<
endl;
01382 conf->resultBuf = MIME_BINARY_UNREADABLE;
01383
return;
01384 }
01385
if ((tagbytes = tagmagic(buf, nbytes))) {
01386
01387 lseek(fd, tagbytes, SEEK_SET);
01388 nbytes = read(fd, (
char*)buf, HOWMANY);
01389
if (nbytes < 0) {
01390 conf->resultBuf = MIME_BINARY_UNREADABLE;
01391
return;
01392 }
01393 }
01394
if (nbytes == 0) {
01395 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01396 }
else {
01397 buf[nbytes++] =
'\0';
01398 tryit(conf, buf, nbytes);
01399 }
01400
01401
if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01402 {
01403
01404
01405
01406
01407
01408
struct utimbuf utbuf;
01409 utbuf.actime = sb.st_atime;
01410 utbuf.modtime = sb.st_mtime;
01411 (
void) utime(fileName, &utbuf);
01412 }
01413 (
void)
close(fd);
01414 }
01415
01416
01417
static void tryit(
struct config_rec* conf,
unsigned char *buf,
int nb)
01418 {
01419
01420
if (match(conf, buf, nb))
01421
return;
01422
01423
01424
if (ascmagic(conf, buf, nb) == 1)
01425
return;
01426
01427
01428
if (textmagic(conf, buf, nb))
01429
return;
01430
01431
01432 conf->resultBuf = MIME_BINARY_UNKNOWN;
01433 conf->accuracy = 0;
01434 }
01435
01436
static int
01437 fsmagic(
struct config_rec* conf,
const char *fn, KDE_struct_stat *sb)
01438 {
01439
int ret = 0;
01440
01441
01442
01443
01444
01445 ret = KDE_lstat(fn, sb);
01446
01447
if (ret) {
01448
return 1;
01449
01450 }
01451
01452
01453
01454
01455
01456
01457
switch (sb->st_mode & S_IFMT) {
01458
case S_IFDIR:
01459 conf->resultBuf = MIME_INODE_DIR;
01460
return 1;
01461
case S_IFCHR:
01462 conf->resultBuf = MIME_INODE_CDEV;
01463
return 1;
01464
case S_IFBLK:
01465 conf->resultBuf = MIME_INODE_BDEV;
01466
return 1;
01467
01468
#ifdef S_IFIFO
01469
case S_IFIFO:
01470 conf->resultBuf = MIME_INODE_FIFO;
01471
return 1;
01472
#endif
01473
#ifdef S_IFLNK
01474
case S_IFLNK:
01475 {
01476
char buf[BUFSIZ + BUFSIZ + 4];
01477
register int nch;
01478 KDE_struct_stat tstatbuf;
01479
01480
if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01481 conf->resultBuf = MIME_INODE_LINK;
01482
01483
return 1;
01484 }
01485 buf[nch] =
'\0';
01486
01487
if (*buf ==
'/') {
01488
if (KDE_stat(buf, &tstatbuf) < 0) {
01489 conf->resultBuf = MIME_INODE_LINK;
01490
01491
return 1;
01492 }
01493 }
else {
01494
char *tmp;
01495
char buf2[BUFSIZ + BUFSIZ + 4];
01496
01497 strncpy(buf2, fn, BUFSIZ);
01498 buf2[BUFSIZ] = 0;
01499
01500
if ((tmp = strrchr(buf2,
'/')) == NULL) {
01501 tmp = buf;
01502 }
else {
01503
01504 *++tmp =
'\0';
01505 strcat(buf2, buf);
01506 tmp = buf2;
01507 }
01508
if (KDE_stat(tmp, &tstatbuf) < 0) {
01509 conf->resultBuf = MIME_INODE_LINK;
01510
01511
return 1;
01512 }
else
01513 strcpy(buf, tmp);
01514 }
01515
if (conf->followLinks)
01516 process( conf, QFile::decodeName( buf ) );
01517
else
01518 conf->resultBuf = MIME_INODE_LINK;
01519
return 1;
01520 }
01521
return 1;
01522
#endif
01523
#ifdef S_IFSOCK
01524
#ifndef __COHERENT__
01525
case S_IFSOCK:
01526 conf->resultBuf = MIME_INODE_SOCK;
01527
return 1;
01528
#endif
01529
#endif
01530
case S_IFREG:
01531
break;
01532
default:
01533
kdError(7018) <<
"KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode <<
"." <<
endl;
01534
01535 }
01536
01537
01538
01539
01540
if (sb->st_size == 0) {
01541 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01542
return 1;
01543 }
01544
return 0;
01545 }
01546
01547
01548
01549
01550
01551
01552
01553
01554
01555
01556
01557
01558
01559
01560
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
static int
01574 match(
struct config_rec* conf,
unsigned char *s,
int nbytes)
01575 {
01576
int cont_level = 0;
01577
union VALUETYPE p;
01578
struct magic *m;
01579
01580
#ifdef DEBUG_MIMEMAGIC
01581
kdDebug(7018) <<
"match: conf=" << conf <<
" m=" << (conf->magic ?
"set" :
"NULL") <<
" m->next=" << ((conf->magic && conf->magic->next) ?
"set" :
"NULL") <<
" last=" << (conf->last ?
"set" :
"NULL") <<
endl;
01582
for (m = conf->magic; m; m = m->next) {
01583
if (isprint((((
unsigned long) m) >> 24) & 255) &&
01584 isprint((((
unsigned long) m) >> 16) & 255) &&
01585 isprint((((
unsigned long) m) >> 8) & 255) &&
01586 isprint(((
unsigned long) m) & 255)) {
01587
kdDebug(7018) <<
"match: POINTER CLOBBERED! " <<
endl;
01588
break;
01589 }
01590 }
01591
#endif
01592
01593
for (m = conf->magic; m; m = m->next) {
01594
#ifdef DEBUG_MIMEMAGIC
01595
kdDebug(7018) <<
"match: line=" << m->lineno <<
" desc=" << m->desc <<
endl;
01596
#endif
01597
memset(&p, 0,
sizeof(
union VALUETYPE));
01598
01599
01600
if (!mget(&p, s, m, nbytes) ||
01601 !mcheck(&p, m)) {
01602
struct magic *m_cont;
01603
01604
01605
01606
01607
if (!m->next || (m->next->cont_level == 0)) {
01608
continue;
01609 }
01610 m_cont = m->next;
01611
while (m_cont && (m_cont->cont_level != 0)) {
01612
#ifdef DEBUG_MIMEMAGIC
01613
kdDebug(7018) <<
"match: line=" << m->lineno <<
" cont=" << m_cont->cont_level <<
" mc=" << m_cont->lineno <<
" mc->next=" << m_cont <<
" " <<
endl;
01614
#endif
01615
01616
01617
01618
01619 m = m_cont;
01620 m_cont = m_cont->next;
01621 }
01622
continue;
01623 }
01624
01625
01626
#ifdef DEBUG_MIMEMAGIC
01627
kdDebug(7018) <<
"match: rule matched, line=" << m->lineno <<
" type=" << m->type <<
" " << ((m->type == STRING) ? m->value.s :
"") <<
endl;
01628
#endif
01629
01630
01631 conf->resultBuf = m->desc;
01632
01633 cont_level++;
01634
01635
01636
01637
01638 m = m->next;
01639
while (m && (m->cont_level != 0)) {
01640
#ifdef DEBUG_MIMEMAGIC
01641
kdDebug(7018) <<
"match: line=" << m->lineno <<
" cont=" << m->cont_level <<
" type=" << m->type <<
" " << ((m->type == STRING) ? m->value.s :
"") <<
endl;
01642
#endif
01643
if (cont_level >= m->cont_level) {
01644
if (cont_level > m->cont_level) {
01645
01646
01647
01648
01649 cont_level = m->cont_level;
01650 }
01651
if (mget(&p, s, m, nbytes) &&
01652 mcheck(&p, m)) {
01653
01654
01655
01656
01657
01658
01659
#ifdef DEBUG_MIMEMAGIC
01660
kdDebug(7018) <<
"continuation matched" <<
endl;
01661
#endif
01662
conf->resultBuf = m->desc;
01663 cont_level++;
01664 }
01665 }
01666
01667 m = m->next;
01668 }
01669
01670
01671
if ( !conf->resultBuf.isEmpty() )
01672 {
01673
#ifdef DEBUG_MIMEMAGIC
01674
kdDebug(7018) <<
"match: matched" <<
endl;
01675
#endif
01676
return 1;
01677 }
01678 }
01679
#ifdef DEBUG_MIMEMAGIC
01680
kdDebug(7018) <<
"match: failed" <<
endl;
01681
#endif
01682
return 0;
01683 }
01684
01685
01686
01687
static int tagmagic(
unsigned char *buf,
int nbytes)
01688 {
01689
if(nbytes<40)
return 0;
01690
if(buf[0] ==
'I' && buf[1] ==
'D' && buf[2] ==
'3') {
01691
int size = 10;
01692
01693
if(buf[3] > 4)
return 0;
01694
if(buf[5] & 0x0F)
return 0;
01695
01696
if(buf[5] & 0x10) size += 10;
01697
01698 size += buf[9];
01699 size += buf[8] << 7;
01700 size += buf[7] << 14;
01701 size += buf[6] << 21;
01702
return size;
01703 }
01704
return 0;
01705 }
01706
01707
struct Token {
01708
char *data;
01709
int length;
01710 };
01711
01712
struct Tokenizer
01713 {
01714 Tokenizer(
char* buf,
int nbytes) {
01715 data = buf;
01716 length = nbytes;
01717 pos = 0;
01718 }
01719
bool isNewLine() {
01720
return newline;
01721 }
01722 Token* nextToken() {
01723
if (pos == 0)
01724 newline =
true;
01725
else
01726 newline =
false;
01727 token.data = data+pos;
01728 token.length = 0;
01729
while(pos<length) {
01730
switch (data[pos]) {
01731
case '\n':
01732 newline =
true;
01733
case '\0':
01734
case '\t':
01735
case ' ':
01736
case '\r':
01737
case '\f':
01738
case ',':
01739
case ';':
01740
case '>':
01741
if (token.length == 0) token.data++;
01742
else
01743
return &token;
01744
break;
01745
default:
01746 token.length++;
01747 }
01748 pos++;
01749 }
01750
return &token;
01751 }
01752
01753
private:
01754 Token token;
01755
char* data;
01756
int length;
01757
int pos;
01758
bool newline;
01759 };
01760
01761
01762
01763
01764
static inline bool STREQ(
const Token *token,
const char *b) {
01765
const char *a = token->data;
01766
int len = token->length;
01767
if (a == b)
return true;
01768
while(*a && *b && len > 0) {
01769
if (*a != *b)
return false;
01770 a++; b++; len--;
01771 }
01772
return (len == 0 && *b == 0);
01773 }
01774
01775
static int ascmagic(
struct config_rec* conf,
unsigned char *buf,
int nbytes)
01776 {
01777
int i;
01778
double pct, maxpct, pctsum;
01779
double pcts[NTYPES];
01780
int mostaccurate, tokencount;
01781
int typeset, jonly, conly, jconly, objconly, cpponly;
01782
int has_escapes = 0;
01783
01784
01785
01786
01787 conf->accuracy = 70;
01788
01789
01790
01791
01792
01793
01794
if (*buf ==
'.') {
01795
unsigned char *tp = buf + 1;
01796
01797
while (isascii(*tp) && isspace(*tp))
01798 ++tp;
01799
if ((isascii(*tp) && (isalnum(*tp) || *tp ==
'\\') &&
01800 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp ==
'"'))) {
01801 conf->resultBuf = MIME_APPL_TROFF;
01802
return 1;
01803 }
01804 }
01805
if ((*buf ==
'c' || *buf ==
'C') &&
01806 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01807
01808 conf->resultBuf = MIME_TEXT_FORTRAN;
01809
return 1;
01810 }
01811 assert(nbytes-1 < HOWMANY + 1);
01812
01813 has_escapes = (memchr(buf,
'\033', nbytes) != NULL);
01814 Tokenizer tokenizer((
char*)buf, nbytes);
01815
const Token* token;
01816
bool linecomment =
false, blockcomment =
false;
01817
const struct names *p;
01818
int typecount[NTYPES];
01819
01820
01821
01822
01823 memset(&typecount, 0,
sizeof(typecount));
01824 typeset = 0;
01825 jonly = 0;
01826 conly = 0;
01827 jconly = 0;
01828 objconly = 0;
01829 cpponly = 0;
01830 tokencount = 0;
01831
bool foundClass =
false;
01832
01833
01834
while ((token = tokenizer.nextToken())->length > 0) {
01835
#ifdef DEBUG_MIMEMAGIC
01836
kdDebug(7018) <<
"KMimeMagic::ascmagic token=" << token <<
endl;
01837
#endif
01838
if (linecomment && tokenizer.isNewLine())
01839 linecomment =
false;
01840
if (blockcomment && STREQ(token,
"*/")) {
01841 blockcomment =
false;
01842
continue;
01843 }
01844
for (p = names; p->name ; p++) {
01845
if (STREQ(token, p->name)) {
01846
#ifdef DEBUG_MIMEMAGIC
01847
kdDebug(7018) <<
"KMimeMagic::ascmagic token matches ! name=" << p->name <<
" type=" << p->type <<
endl;
01848
#endif
01849
tokencount++;
01850 typeset |= p->type;
01851
if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01852
if (linecomment || blockcomment) {
01853
continue;
01854 }
01855
else {
01856
switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC))
01857 {
01858
case L_JAVA:
01859 jonly++;
01860
break;
01861
case L_OBJC:
01862 objconly++;
01863
break;
01864
case L_CPP:
01865 cpponly++;
01866
break;
01867
case (L_CPP|L_JAVA):
01868 jconly++;
01869
if ( !foundClass && STREQ(token,
"class") )
01870 foundClass =
true;
01871
break;
01872
case (L_C|L_CPP):
01873 conly++;
01874
break;
01875
default:
01876
if (STREQ(token,
"//")) linecomment =
true;
01877
if (STREQ(token,
"/*")) blockcomment =
true;
01878 }
01879 }
01880 }
01881
for (i = 0; i < (
int)NTYPES; i++) {
01882
if ((1 << i) & p->type) typecount[i]++;
01883 }
01884 }
01885 }
01886 }
01887
01888
if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01889 conf->accuracy = 60;
01890
if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) {
01891
#ifdef DEBUG_MIMEMAGIC
01892
kdDebug(7018) <<
"C/C++/Java/ObjC: jonly=" << jonly <<
" conly=" << conly <<
" jconly=" << jconly <<
" objconly=" << objconly <<
endl;
01893
#endif
01894
if (jonly > 1 && foundClass) {
01895
01896 conf->resultBuf =
QString(types[P_JAVA].type);
01897
return 1;
01898 }
01899
if (jconly > 1) {
01900
01901
if (typecount[P_JAVA] < typecount[P_CPP])
01902 conf->resultBuf =
QString(types[P_CPP].type);
01903
else
01904 conf->resultBuf =
QString(types[P_JAVA].type);
01905
return 1;
01906 }
01907
if (conly + cpponly > 1) {
01908
01909
if (cpponly > 0)
01910 conf->resultBuf =
QString(types[P_CPP].type);
01911
else
01912 conf->resultBuf =
QString(types[P_C].type);
01913
return 1;
01914 }
01915
if (objconly > 0) {
01916 conf->resultBuf =
QString(types[P_OBJC].type);
01917
return 1;
01918 }
01919 }
01920 }
01921
01922
01923
01924
01925
01926 mostaccurate = -1;
01927 maxpct = pctsum = 0.0;
01928
for (i = 0; i < (
int)NTYPES; i++) {
01929
if (typecount[i] > 1) {
01930 pct = (
double)typecount[i] / (
double)types[i].kwords *
01931 (
double)types[i].weight;
01932 pcts[i] = pct;
01933 pctsum += pct;
01934
if (pct > maxpct) {
01935 maxpct = pct;
01936 mostaccurate = i;
01937 }
01938
#ifdef DEBUG_MIMEMAGIC
01939
kdDebug(7018) <<
"" << types[i].type <<
" has " << typecount[i] <<
" hits, " << types[i].kwords <<
" kw, weight " << types[i].weight <<
", " << pct <<
" -> max = " << maxpct <<
"\n" <<
endl;
01940
#endif
01941
}
01942 }
01943
if (mostaccurate >= 0) {
01944
if ( mostaccurate != P_JAVA || foundClass )
01945 {
01946 conf->accuracy = (
int)(pcts[mostaccurate] / pctsum * 60);
01947
#ifdef DEBUG_MIMEMAGIC
01948
kdDebug(7018) <<
"mostaccurate=" << mostaccurate <<
" pcts=" << pcts[mostaccurate] <<
" pctsum=" << pctsum <<
" accuracy=" << conf->accuracy <<
endl;
01949
#endif
01950
conf->resultBuf =
QString(types[mostaccurate].type);
01951
return 1;
01952 }
01953 }
01954
01955
switch (is_tar(buf, nbytes)) {
01956
case 1:
01957
01958 conf->resultBuf = MIME_APPL_TAR;
01959 conf->accuracy = 90;
01960
return 1;
01961
case 2:
01962
01963 conf->resultBuf = MIME_APPL_TAR;
01964 conf->accuracy = 90;
01965
return 1;
01966 }
01967
01968
for (i = 0; i < nbytes; i++) {
01969
if (!isascii(*(buf + i)))
01970
return 0;
01971 }
01972
01973
01974 conf->accuracy = 90;
01975
if (has_escapes) {
01976
01977
01978 conf->resultBuf = MIME_TEXT_UNKNOWN;
01979 }
else {
01980
01981 conf->resultBuf = MIME_TEXT_PLAIN;
01982 }
01983
return 1;
01984 }
01985
01986
01987
#define TEXT_MAXLINELEN 300
01988
01989
01990
01991
01992
01993
static int textmagic(
struct config_rec* conf,
unsigned char * buf,
int nbytes)
01994 {
01995
int i;
01996
unsigned char *cp;
01997
01998 nbytes--;
01999
02000
02001
for (i = 0, cp = buf; i < nbytes; i++, cp++)
02002
if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
02003
return 0;
02004
02005
02006
02007
02008
for (i = 0; i < nbytes;) {
02009 cp = (
unsigned char *) memchr(buf,
'\n', nbytes - i);
02010
if (cp == NULL) {
02011
02012
if (i + TEXT_MAXLINELEN >= nbytes)
02013
break;
02014
else
02015
return 0;
02016 }
02017
if (cp - buf > TEXT_MAXLINELEN)
02018
return 0;
02019 i += (cp - buf + 1);
02020 buf = cp + 1;
02021 }
02022 conf->resultBuf = MIME_TEXT_PLAIN;
02023
return 1;
02024 }
02025
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039
02040
#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
02041
02042
02043
02044
02045
02046
02047
static int
02048 is_tar(
unsigned char *buf,
int nbytes)
02049 {
02050
register union record *header = (
union record *) buf;
02051
register int i;
02052
register long sum,
02053 recsum;
02054
register char *p;
02055
02056
if (nbytes < (
int)
sizeof(
union record))
02057 return 0;
02058
02059 recsum = from_oct(8, header->header.chksum);
02060
02061 sum = 0;
02062 p = header->charptr;
02063
for (i =
sizeof(
union record); --i >= 0;) {
02064
02065
02066
02067
02068 sum += 0xFF & *p++;
02069 }
02070
02071
02072
for (i =
sizeof(header->header.chksum); --i >= 0;)
02073 sum -= 0xFF & header->header.chksum[i];
02074 sum +=
' ' *
sizeof header->header.chksum;
02075
02076
if (sum != recsum)
02077
return 0;
02078
02079
if (0 == strcmp(header->header.magic, TMAGIC))
02080
return 2;
02081
02082
return 1;
02083 }
02084
02085
02086
02087
02088
02089
02090
02091
static long
02092 from_oct(
int digs,
char *where)
02093 {
02094
register long value;
02095
02096
while (isspace(*where)) {
02097 where++;
02098
if (--digs <= 0)
02099
return -1;
02100 }
02101 value = 0;
02102
while (digs > 0 && isodigit(*where)) {
02103 value = (value << 3) | (*where++ -
'0');
02104 --digs;
02105 }
02106
02107
if (digs > 0 && *where && !isspace(*where))
02108
return -1;
02109
02110
return value;
02111 }
02112
02113 KMimeMagic::KMimeMagic()
02114 {
02115
02116
QString mimefile =
locate(
"mime",
"magic" );
02117 init( mimefile );
02118
02119
QStringList snippets =
KGlobal::dirs()->
findAllResources(
"config",
"magic/*.magic",
true );
02120
for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02121
if ( !
mergeConfig( *it ) )
02122
kdWarning() <<
k_funcinfo <<
"Failed to parse " << *it <<
endl;
02123 }
02124
02125 KMimeMagic::KMimeMagic(
const QString & _configfile)
02126 {
02127 init( _configfile );
02128 }
02129
02130
void KMimeMagic::init(
const QString& _configfile )
02131 {
02132
int result;
02133 conf =
new config_rec;
02134
02135
02136 conf->magic = conf->last = NULL;
02137 magicResult = NULL;
02138 conf->followLinks =
false;
02139
02140 conf->utimeConf = 0L;
02141
02142 result = apprentice(_configfile);
02143
if (result == -1)
02144
return;
02145
#ifdef MIME_MAGIC_DEBUG_TABLE
02146
test_table();
02147
#endif
02148
}
02149
02150
02151
02152
02153
02154 KMimeMagic::~KMimeMagic()
02155 {
02156
if (conf) {
02157
struct magic *p = conf->magic;
02158
struct magic *q;
02159
while (p) {
02160 q = p;
02161 p = p->next;
02162 free(q);
02163 }
02164
delete conf->utimeConf;
02165
delete conf;
02166 }
02167
delete magicResult;
02168 }
02169
02170
bool
02171 KMimeMagic::mergeConfig(
const QString & _configfile)
02172 {
02173
kdDebug(7018) <<
k_funcinfo << _configfile <<
endl;
02174
int result;
02175
02176
if (_configfile.isEmpty())
02177
return false;
02178 result = apprentice(_configfile);
02179
if (result == -1) {
02180
return false;
02181 }
02182
#ifdef MIME_MAGIC_DEBUG_TABLE
02183
test_table();
02184
#endif
02185
return true;
02186 }
02187
02188
bool
02189 KMimeMagic::mergeBufConfig(
char * _configbuf)
02190 {
02191
int result;
02192
02193
if (conf) {
02194 result = buff_apprentice(_configbuf);
02195
if (result == -1)
02196
return false;
02197
#ifdef MIME_MAGIC_DEBUG_TABLE
02198
test_table();
02199
#endif
02200
return true;
02201 }
02202
return false;
02203 }
02204
02205
void
02206 KMimeMagic::setFollowLinks(
bool _enable )
02207 {
02208 conf->followLinks = _enable;
02209 }
02210
02211
KMimeMagicResult *
02212 KMimeMagic::findBufferType(
const QByteArray &array)
02213 {
02214
unsigned char buf[HOWMANY + 1];
02215
02216 conf->resultBuf = QString::null;
02217
if ( !
magicResult )
02218
magicResult =
new KMimeMagicResult();
02219
magicResult->
setInvalid();
02220 conf->accuracy = 100;
02221
02222
int nbytes = array.size();
02223
02224
if (nbytes > HOWMANY)
02225 nbytes = HOWMANY;
02226 memcpy(buf, array.data(), nbytes);
02227
if (nbytes == 0) {
02228 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02229 }
else {
02230 buf[nbytes++] =
'\0';
02231 tryit(conf, buf, nbytes);
02232 }
02233
02234
magicResult->
setMimeType(conf->resultBuf.stripWhiteSpace());
02235
magicResult->
setAccuracy(conf->accuracy);
02236
return magicResult;
02237 }
02238
02239
static void
02240 refineResult(
KMimeMagicResult *r,
const QString & _filename)
02241 {
02242
QString tmp = r->
mimeType();
02243
if (tmp.isEmpty())
02244
return;
02245
if ( tmp ==
"text/x-c" || tmp ==
"text/x-objc" )
02246 {
02247
if ( _filename.right(2) ==
".h" )
02248 tmp +=
"hdr";
02249
else
02250 tmp +=
"src";
02251 r->
setMimeType(tmp);
02252 }
02253
else
02254
if ( tmp ==
"text/x-c++" )
02255 {
02256
if ( _filename.endsWith(
".h")
02257 || _filename.endsWith(
".hh")
02258 || _filename.endsWith(
".H")
02259 || !_filename.right(4).contains(
'.'))
02260 tmp +=
"hdr";
02261
else
02262 tmp +=
"src";
02263 r->
setMimeType(tmp);
02264 }
02265 }
02266
02267
KMimeMagicResult *
02268 KMimeMagic::findBufferFileType(
const QByteArray &data,
02269
const QString &fn)
02270 {
02271
KMimeMagicResult * r =
findBufferType( data );
02272 refineResult(r, fn);
02273
return r;
02274 }
02275
02276
02277
02278
02279 KMimeMagicResult*
KMimeMagic::findFileType(
const QString & fn)
02280 {
02281
#ifdef DEBUG_MIMEMAGIC
02282
kdDebug(7018) <<
"KMimeMagic::findFileType " << fn <<
endl;
02283
#endif
02284
conf->resultBuf = QString::null;
02285
02286
if ( !
magicResult )
02287
magicResult =
new KMimeMagicResult();
02288
magicResult->
setInvalid();
02289 conf->accuracy = 100;
02290
02291
if ( !conf->utimeConf )
02292 conf->utimeConf =
new KMimeMagicUtimeConf();
02293
02294
02295 process(conf, fn );
02296
02297
02298
02299
magicResult->
setMimeType(conf->resultBuf.stripWhiteSpace());
02300
magicResult->
setAccuracy(conf->accuracy);
02301 refineResult(
magicResult, fn);
02302
return magicResult;
02303 }