LibreOffice Module extensions (master) 1
OOoSpotlightImporter.m
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#import <zlib.h>
21
23#import "OOoMetaDataParser.h"
25
26/* a dictionary to hold the UTIs */
27static NSDictionary *uti2kind;
28
29typedef struct {
30 unsigned short min_version;
31 unsigned short general_flag;
32 unsigned short compression;
33 unsigned short lastmod_time;
34 unsigned short lastmod_date;
35 unsigned crc32;
38 unsigned short filename_size;
39 unsigned short extra_field_size;
40 NSString *filename;
41 NSString *extra_field;
43
44typedef struct {
45 unsigned short creator_version;
46 unsigned short min_version;
47 unsigned short general_flag;
48 unsigned short compression;
49 unsigned short lastmod_time;
50 unsigned short lastmod_date;
51 unsigned crc32;
54 unsigned short filename_size;
55 unsigned short extra_field_size;
56 unsigned short file_comment_size;
57 unsigned short disk_num;
58 unsigned short internal_attr;
59 unsigned external_attr;
60 unsigned offset;
61 NSString *filename;
62 NSString *extra_field;
63 NSString *file_comment;
65
66typedef struct {
67 unsigned short disk_num;
68 unsigned short cdir_disk;
69 unsigned short disk_entries;
70 unsigned short cdir_entries;
71 unsigned cdir_size;
72 unsigned cdir_offset;
73 unsigned short comment_size;
74 NSString *comment;
76
77#define CDIR_ENTRY_SIG (0x02014b50)
78#define LOC_FILE_HEADER_SIG (0x04034b50)
79#define CDIR_END_SIG (0x06054b50)
80
81static unsigned char readByte(NSFileHandle *file)
82{
83 if (file == nil)
84 return 0;
85 NSData* tmpBuf = [file readDataOfLength: 1];
86 if (tmpBuf == nil)
87 return 0;
88 unsigned char *d = (unsigned char*)[tmpBuf bytes];
89 if (d == nil)
90 return 0;
91 return *d;
92}
93
94static unsigned short readShort(NSFileHandle *file)
95{
96 unsigned short p0 = (unsigned short)readByte(file);
97 unsigned short p1 = (unsigned short)readByte(file);
98 return (unsigned short)(p0|(p1<<8));
99}
100
101static unsigned readInt(NSFileHandle *file)
102{
103 unsigned p0 = (unsigned)readByte(file);
104 unsigned p1 = (unsigned)readByte(file);
105 unsigned p2 = (unsigned)readByte(file);
106 unsigned p3 = (unsigned)readByte(file);
107 return (unsigned)(p0|(p1<<8)|(p2<<16)|(p3<<24));
108}
109
110static bool readCentralDirectoryEnd(NSFileHandle *file, CentralDirectoryEnd *end)
111{
112 unsigned signature = readInt(file);
113 if (signature != CDIR_END_SIG)
114 return false;
115
116 end->disk_num = readShort(file);
117 end->cdir_disk = readShort(file);
118 end->disk_entries = readShort(file);
119 end->cdir_entries = readShort(file);
120 end->cdir_size = readInt(file);
121 end->cdir_offset = readInt(file);
122 end->comment_size = readShort(file);
123 NSData *data = [file readDataOfLength: end->comment_size];
124 end->comment = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
125 return true;
126}
127
128static bool readCentralDirectoryEntry(NSFileHandle *file, CentralDirectoryEntry *entry)
129{
130 unsigned signature = readInt(file);
131 if (signature != CDIR_ENTRY_SIG)
132 return false;
133
134 entry->creator_version = readShort(file);
135 entry->min_version = readShort(file);
136 entry->general_flag = readShort(file);
137 entry->compression = readShort(file);
138 entry->lastmod_time = readShort(file);
139 entry->lastmod_date = readShort(file);
140 entry->crc32 = readInt(file);
141 entry->compressed_size = readInt(file);
142 entry->uncompressed_size = readInt(file);
143 entry->filename_size = readShort(file);
144 entry->extra_field_size = readShort(file);
145 entry->file_comment_size = readShort(file);
146 entry->disk_num = readShort(file);
147 entry->internal_attr = readShort(file);
148 entry->external_attr = readInt(file);
149 entry->offset = readInt(file);
150 NSData *data = [file readDataOfLength: entry->filename_size];
151 entry->filename = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
152 data = [file readDataOfLength: entry->extra_field_size];
153 entry->extra_field = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
154 data = [file readDataOfLength: entry->file_comment_size];
155 entry->file_comment = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
156 return true;
157}
158
159static bool readLocalFileHeader(NSFileHandle *file, LocalFileHeader *header)
160{
161 unsigned signature = readInt(file);
162 if (signature != LOC_FILE_HEADER_SIG)
163 return false;
164
165 header->min_version = readShort(file);
166 header->general_flag = readShort(file);
167 header->compression = readShort(file);
168 header->lastmod_time = readShort(file);
169 header->lastmod_date = readShort(file);
170 header->crc32 = readInt(file);
171 header->compressed_size = readInt(file);
172 header->uncompressed_size = readInt(file);
173 header->filename_size = readShort(file);
174 header->extra_field_size = readShort(file);
175 NSData *data = [file readDataOfLength: header->filename_size];
176 header->filename = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
177 data = [file readDataOfLength: header->extra_field_size];
178 header->extra_field = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
179 return true;
180}
181
182static bool areHeadersConsistent(const LocalFileHeader *header, const CentralDirectoryEntry *entry)
183{
184 if (header->min_version != entry->min_version)
185 return false;
186 if (header->general_flag != entry->general_flag)
187 return false;
188 if (header->compression != entry->compression)
189 return false;
190 if (!(header->general_flag & 0x08))
191 {
192 if (header->crc32 != entry->crc32)
193 return false;
194 if (header->compressed_size != entry->compressed_size)
195 return false;
196 if (header->uncompressed_size != entry->uncompressed_size)
197 return false;
198 }
199 return true;
200}
201
202static bool findCentralDirectoryEnd(NSFileHandle *file)
203{
204 // Assume the cdir end is in the last 1024 bytes
205 // Scan backward from end of file for the end signature
206
207 [file seekToEndOfFile];
208 unsigned long long fileLength = [file offsetInFile];
209
210 if (fileLength < 10)
211 return false;
212
213 [file seekToFileOffset: (fileLength - 4)];
214
215 unsigned long long limit;
216 if (fileLength > 1024)
217 limit = fileLength - 1024;
218 else
219 limit = 0;
220
221 unsigned long long offset;
222 while ((offset = [file offsetInFile]) > limit)
223 {
224 unsigned signature = readInt(file);
225 if (signature == CDIR_END_SIG)
226 {
227 // Seek back over the CDIR_END_SIG
228 [file seekToFileOffset: offset];
229 return true;
230 }
231 else
232 {
233 // Seek one byte back
234 [file seekToFileOffset: (offset - 1)];
235 }
236 }
237 return false;
238}
239
240static bool isZipFile(NSFileHandle *file)
241{
242 if (!findCentralDirectoryEnd(file))
243 return false;
245 if (!readCentralDirectoryEnd(file, &end))
246 return false;
247 [file seekToFileOffset: end.cdir_offset];
249 if (!readCentralDirectoryEntry(file, &entry))
250 return false;
251 [file seekToFileOffset: entry.offset];
253 if (!readLocalFileHeader(file, &header))
254 return false;
255 if (!areHeadersConsistent(&header, &entry))
256 return false;
257 return true;
258}
259
260static bool findDataStream(NSFileHandle *file, CentralDirectoryEntry *entry, NSString *name)
261{
262 [file seekToEndOfFile];
263 unsigned long long fileLength = [file offsetInFile];
264 if (!findCentralDirectoryEnd(file))
265 return false;
267 if (!readCentralDirectoryEnd(file, &end))
268 return false;
269 [file seekToFileOffset: end.cdir_offset];
270 do
271 {
272 if (!readCentralDirectoryEntry(file, entry))
273 return false;
274 if ([entry->filename compare: name] == NSOrderedSame)
275 break;
276 }
277 while ( [file offsetInFile] < fileLength && [file offsetInFile] < end.cdir_offset + end.cdir_size);
278 if ([entry->filename compare: name] != NSOrderedSame)
279 return false;
280 [file seekToFileOffset: entry->offset];
282 if (!readLocalFileHeader(file, &header))
283 return false;
284 if (!areHeadersConsistent(&header, entry))
285 return false;
286 return true;
287}
288
289static NSData *getUncompressedData(NSFileHandle *file, NSString *name)
290{
292 if (!findDataStream(file, &entry, name))
293 return nil;
294 if (!entry.compression)
295 return [file readDataOfLength: entry.compressed_size];
296 else
297 {
298 int ret;
299 z_stream strm;
300
301 /* allocate inflate state */
302 strm.zalloc = Z_NULL;
303 strm.zfree = Z_NULL;
304 strm.opaque = Z_NULL;
305 strm.avail_in = 0;
306 strm.next_in = Z_NULL;
307 ret = inflateInit2(&strm,-MAX_WBITS);
308 if (ret != Z_OK)
309 return nil;
310
311 NSData *compressedData = [file readDataOfLength: entry.compressed_size];
312
313 strm.avail_in = [compressedData length];
314 strm.next_in = (Bytef *)[compressedData bytes];
315
316 Bytef *uncompressedData = (Bytef *)malloc(entry.uncompressed_size);
317 if (!uncompressedData)
318 {
319 (void)inflateEnd(&strm);
320 return nil;
321 }
322 strm.avail_out = entry.uncompressed_size;
323 strm.next_out = uncompressedData;
324 ret = inflate(&strm, Z_FINISH);
325 switch (ret)
326 {
327 case Z_NEED_DICT:
328 case Z_DATA_ERROR:
329 case Z_MEM_ERROR:
330 (void)inflateEnd(&strm);
331 free(uncompressedData);
332 return nil;
333 }
334 (void)inflateEnd(&strm);
335 NSData *returnBuffer = [NSData dataWithBytes:(const void *)uncompressedData length:entry.uncompressed_size];
336 free(uncompressedData);
337 return returnBuffer;
338 }
339}
340
341@implementation OOoSpotlightImporter
342
343/* initialize is only called once the first time this class is loaded */
344+ (void)initialize
345{
346 static BOOL isInitialized = NO;
347 if (isInitialized == NO) {
348 NSMutableDictionary *temp = [NSMutableDictionary new];
349 [temp setObject:@"OpenOffice.org 1.0 Text" forKey:@"org.openoffice.text"];
350 [temp setObject:@"OpenDocument Text" forKey:@"org.oasis.opendocument.text"];
351 [temp setObject:@"OpenOffice.org 1.0 Spreadsheet" forKey:@"org.openoffice.spreadsheet"];
352 [temp setObject:@"OpenDocument Spreadsheet" forKey:@"org.oasis.opendocument.spreadsheet"];
353 [temp setObject:@"OpenOffice.org 1.0 Presentation" forKey:@"org.openoffice.presentation"];
354 [temp setObject:@"OpenDocument Presentation" forKey:@"org.oasis.opendocument.presentation"];
355 [temp setObject:@"OpenOffice.org 1.0 Drawing" forKey:@"org.openoffice.graphics"];
356 [temp setObject:@"OpenDocument Drawing" forKey:@"org.oasis.opendocument.graphics"];
357 [temp setObject:@"OpenOffice.org 1.0 Master" forKey:@"org.openoffice.text-master"];
358 [temp setObject:@"OpenDocument Master" forKey:@"org.oasis.opendocument.text-master"];
359 [temp setObject:@"OpenOffice.org 1.0 Formula" forKey:@"org.openoffice.formula"];
360 [temp setObject:@"OpenDocument Formula" forKey:@"org.oasis.opendocument.formula"];
361 [temp setObject:@"OpenOffice.org 1.0 Text Template" forKey:@"org.openoffice.text-template"];
362 [temp setObject:@"OpenDocument Text Template" forKey:@"org.oasis.opendocument.text-template"];
363 [temp setObject:@"OpenOffice.org 1.0 Spreadsheet Template" forKey:@"org.openoffice.spreadsheet-template"];
364 [temp setObject:@"OpenDocument Spreadsheet Template" forKey:@"org.oasis.opendocument.spreadsheet-template"];
365 [temp setObject:@"OpenOffice.org 1.0 Presentation Template" forKey:@"org.openoffice.presentation-template"];
366 [temp setObject:@"OpenDocument Presentation Template" forKey:@"org.oasis.opendocument.presentation-template"];
367 [temp setObject:@"OpenOffice.org 1.0 Drawing Template" forKey:@"org.openoffice.graphics-template"];
368 [temp setObject:@"OpenDocument Drawing Template" forKey:@"org.oasis.opendocument.graphics-template"];
369 [temp setObject:@"OpenOffice.org 1.0 Database" forKey:@"org.openoffice.database"];
370 [temp setObject:@"OpenDocument Chart" forKey:@"org.oasis.opendocument.chart"];
371
372 uti2kind = [[NSDictionary dictionaryWithDictionary:temp] retain];
373 [temp release];
374
375 isInitialized = YES;
376 }
377}
378
379/* importDocument is the real starting point for our plugin */
380- (BOOL)importDocument:(NSString*)pathToFile contentType:(NSString*)contentTypeUTI attributes:(NSMutableDictionary*)attributes
381{
382 //NSLog(contentTypeUTI);
383 //NSLog(pathToFile);
384
385 NSString *itemKind = [uti2kind objectForKey:contentTypeUTI];
386 if (itemKind != nil) {
387 [attributes setObject:itemKind forKey:(NSString*)kMDItemKind];
388 }
389
390 //first check to see if this is a valid zipped file that contains a file "meta.xml"
391 NSFileHandle *unzipFile = [self openZipFileAtPath:pathToFile];
392
393
394 if (unzipFile == nil) {
395 //NSLog(@"zip file not open");
396 return NO;
397 }
398
399 //first get the metadata
400 NSData *metaData = [self metaDataFileFromZip:unzipFile];
401 if (metaData == nil) {
402 [unzipFile closeFile];
403 return YES;
404 }
405
406 [metaData retain];
407
409 if (parser != nil) {
410 //parse and extract the data
411 [parser parseXML:metaData intoDictionary:attributes];
412 }
413
414 [metaData release];
415 [parser release];
416
417 //and now get the content
418 NSData *contentData = [self contentDataFileFromZip:unzipFile];
419 if (contentData == nil) {
420 [unzipFile closeFile];
421 return YES;
422 }
423
424 [contentData retain];
425
427 if (parser2 != nil) {
428 //parse and extract the data
429 [parser2 parseXML:contentData intoDictionary:attributes];
430 }
431
432 [contentData release];
433 [parser2 release];
434
435 [unzipFile closeFile];
436
437 return YES;
438}
439
440/* openZipFileAtPath returns the file as a valid data structure or nil otherwise*/
441- (NSFileHandle*)openZipFileAtPath:(NSString*)pathToFile
442{
443 NSFileHandle* unzipFile = nil;
444
445 if ([pathToFile length] != 0)
446 {
447 unzipFile = [NSFileHandle fileHandleForReadingAtPath: pathToFile];
448 }
449
450 if (unzipFile == nil)
451 {
452 //NSLog(@"Cannot open %s",zipfilename);
453 return nil;
454 }
455
456 if (!isZipFile(unzipFile))
457 {
458 [unzipFile closeFile];
459 return nil;
460 }
461 //NSLog(@"%s opened",zipfilename);
462
463 return unzipFile;
464}
465
466/* metaDataFileFromZip extracts the file meta.xml from the zip file and returns it as an NSData* structure
467 or nil if the metadata is not present */
468- (NSData*) metaDataFileFromZip:(NSFileHandle*)unzipFile
469{
470 if (unzipFile == nil)
471 return nil;
472 return getUncompressedData(unzipFile, @"meta.xml");
473}
474
475/* contentDataFileFromZip extracts the file content.xml from the zip file and returns it as an NSData* structure
476 or nil if the metadata is not present */
477- (NSData*) contentDataFileFromZip:(NSFileHandle*)unzipFile
478{
479 if (unzipFile == nil)
480 return nil;
481 return getUncompressedData(unzipFile, @"content.xml");
482}
483
484
485@end
486
487/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
constexpr sal_Int8 header[]
static unsigned char readByte(NSFileHandle *file)
static NSData * getUncompressedData(NSFileHandle *file, NSString *name)
#define CDIR_END_SIG
static bool isZipFile(NSFileHandle *file)
static unsigned short readShort(NSFileHandle *file)
static unsigned readInt(NSFileHandle *file)
static bool readCentralDirectoryEnd(NSFileHandle *file, CentralDirectoryEnd *end)
static bool areHeadersConsistent(const LocalFileHeader *header, const CentralDirectoryEntry *entry)
static bool readLocalFileHeader(NSFileHandle *file, LocalFileHeader *header)
static bool findCentralDirectoryEnd(NSFileHandle *file)
static bool readCentralDirectoryEntry(NSFileHandle *file, CentralDirectoryEntry *entry)
static bool findDataStream(NSFileHandle *file, CentralDirectoryEntry *entry, NSString *name)
static NSDictionary * uti2kind
#define LOC_FILE_HEADER_SIG
#define CDIR_ENTRY_SIG
double d
const char * name
void parseXML:intoDictionary:(NSData *data,[intoDictionary] NSMutableDictionary *dict)
void parseXML:intoDictionary:(NSData *data,[intoDictionary] NSMutableDictionary *dict)
end
parser
std::vector< sal_uInt8 > bytes
const wchar_t *typedef BOOL
SbxDecimal::CmpResult compare(SAL_UNUSED_PARAMETER const SbxDecimal &, SAL_UNUSED_PARAMETER const SbxDecimal &)
unsigned short disk_entries
unsigned short cdir_entries
unsigned short comment_size
unsigned short creator_version
unsigned short extra_field_size
unsigned short file_comment_size
unsigned short filename_size
unsigned short extra_field_size
unsigned short compression
unsigned short lastmod_time
unsigned short general_flag
unsigned short min_version
unsigned short lastmod_date