LibreOffice Module onlineupdate (master) 1
bsdiff.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 bsdiff.c -- Binary patch generator.
4
5 Copyright 2003 Colin Percival
6
7 For the terms under which this work may be distributed, please see
8 the adjoining file "LICENSE".
9
10 ChangeLog:
11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
12 values throughout.
13 --Benjamin Smedberg <benjamin@smedbergs.us>
14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
15 provided by libbz2.
16 --Darin Fisher <darin@meer.net>
17*/
18
19#include "bspatch.h"
20
21#include <stdlib.h>
22#include <stdio.h>
23#include <string.h>
24#include <fcntl.h>
25#include <stdarg.h>
26#ifdef _WIN32
27#include <io.h>
28#include <winsock2.h>
29#else
30#include <unistd.h>
31#include <arpa/inet.h>
32#define _O_BINARY 0
33#endif
34
35#undef MIN
36#define MIN(x,y) (((x)<(y)) ? (x) : (y))
37
38/*---------------------------------------------------------------------------*/
39
40/* This variable lives in libbz2. It's declared in bzlib_private.h, so we just
41 * declare it here to avoid including that entire header file.
42 */
43extern "C" unsigned int BZ2_crc32Table[256];
44
45static unsigned int
46crc32(const unsigned char *buf, unsigned int len)
47{
48 unsigned int crc = 0xffffffffL;
49
50 const unsigned char *end = buf + len;
51 for (; buf != end; ++buf)
52 crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf];
53
54 crc = ~crc;
55 return crc;
56}
57
58/*---------------------------------------------------------------------------*/
59
60static void
61reporterr(int e, const char *fmt, ...)
62{
63 if (fmt) {
64 va_list args;
65 va_start(args, fmt);
66 vfprintf(stderr, fmt, args);
67 va_end(args);
68 }
69
70 exit(e);
71}
72
73static void
74split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h)
75{
76 int32_t i,j,k,x,tmp,jj,kk;
77
78 if(len<16) {
79 for(k=start;k<start+len;k+=j) {
80 j=1;x=V[I[k]+h];
81 for(i=1;k+i<start+len;i++) {
82 if(V[I[k+i]+h]<x) {
83 x=V[I[k+i]+h];
84 j=0;
85 };
86 if(V[I[k+i]+h]==x) {
87 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
88 j++;
89 };
90 };
91 for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
92 if(j==1) I[k]=-1;
93 };
94 return;
95 };
96
97 x=V[I[start+len/2]+h];
98 jj=0;kk=0;
99 for(i=start;i<start+len;i++) {
100 if(V[I[i]+h]<x) jj++;
101 if(V[I[i]+h]==x) kk++;
102 };
103 jj+=start;kk+=jj;
104
105 i=start;j=0;k=0;
106 while(i<jj) {
107 if(V[I[i]+h]<x) {
108 i++;
109 } else if(V[I[i]+h]==x) {
110 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
111 j++;
112 } else {
113 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
114 k++;
115 };
116 };
117
118 while(jj+j<kk) {
119 if(V[I[jj+j]+h]==x) {
120 j++;
121 } else {
122 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
123 k++;
124 };
125 };
126
127 if(jj>start) split(I,V,start,jj-start,h);
128
129 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
130 if(jj==kk-1) I[jj]=-1;
131
132 if(start+len>kk) split(I,V,kk,start+len-kk,h);
133}
134
135static void
136qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize)
137{
138 int32_t buckets[256];
139 int32_t i,h,len;
140
141 for(i=0;i<256;i++) buckets[i]=0;
142 for(i=0;i<oldsize;i++) buckets[old[i]]++;
143 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
144 for(i=255;i>0;i--) buckets[i]=buckets[i-1];
145 buckets[0]=0;
146
147 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
148 I[0]=oldsize;
149 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
150 V[oldsize]=0;
151 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
152 I[0]=-1;
153
154 for(h=1;I[0]!=-(oldsize+1);h+=h) {
155 len=0;
156 for(i=0;i<oldsize+1;) {
157 if(I[i]<0) {
158 len-=I[i];
159 i-=I[i];
160 } else {
161 if(len) I[i-len]=-len;
162 len=V[I[i]]+1-i;
163 split(I,V,i,len,h);
164 i+=len;
165 len=0;
166 };
167 };
168 if(len) I[i-len]=-len;
169 };
170
171 for(i=0;i<oldsize+1;i++) I[V[i]]=i;
172}
173
174static int32_t
175matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize)
176{
177 int32_t i;
178
179 for(i=0;(i<oldsize)&&(i<newsize);i++)
180 if(old[i]!=newbuf[i]) break;
181
182 return i;
183}
184
185static int32_t
186search(int32_t *I,unsigned char *old,int32_t oldsize,
187 unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos)
188{
189 int32_t x,y;
190
191 if(en-st<2) {
192 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
193 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
194
195 if(x>y) {
196 *pos=I[st];
197 return x;
198 } else {
199 *pos=I[en];
200 return y;
201 }
202 };
203
204 x=st+(en-st)/2;
205 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
206 return search(I,old,oldsize,newbuf,newsize,x,en,pos);
207 } else {
208 return search(I,old,oldsize,newbuf,newsize,st,x,pos);
209 };
210}
211
212int main(int argc,char *argv[])
213{
214 int fd;
215 unsigned char *old = nullptr,*newbuf = nullptr;
216 int32_t oldsize = 0, newsize = 0;
217 int32_t *I = nullptr,*V = nullptr;
218
219 int32_t scan,pos = 0,len;
220 int32_t lastscan,lastpos,lastoffset;
221 int32_t oldscore,scsc;
222
223 int32_t s,Sf,lenf,Sb,lenb;
224 int32_t overlap,Ss,lens;
225 int32_t i;
226
227 int32_t dblen,eblen;
228 unsigned char *db,*eb = nullptr;
229
230 unsigned int scrc;
231
233 {'M','B','D','I','F','F','1','0'},
234 0, 0, 0, 0, 0, 0
235 };
236
237 uint32_t numtriples;
238
239 if(argc!=4)
240 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]);
241
242 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
243 that we never try to malloc(0) and get a NULL pointer */
244 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) ||
245 ((oldsize=lseek(fd,0,SEEK_END))==-1) ||
246 ((old=(unsigned char*) malloc(oldsize+1))==NULL) ||
247 (lseek(fd,0,SEEK_SET)!=0) ||
248 (read(fd,old,oldsize)!=oldsize) ||
249 (close(fd)==-1))
250 reporterr(1,"%s\n",argv[1]);
251
252 scrc = crc32(old, oldsize);
253
254 if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) ||
255 ((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL))
256 reporterr(1,NULL);
257
258 qsufsort(I,V,old,oldsize);
259
260 free(V);
261
262 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
263 that we never try to malloc(0) and get a NULL pointer */
264 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) ||
265 ((newsize=lseek(fd,0,SEEK_END))==-1) ||
266 ((newbuf=(unsigned char*) malloc(newsize+1))==NULL) ||
267 (lseek(fd,0,SEEK_SET)!=0) ||
268 (read(fd,newbuf,newsize)!=newsize) ||
269 (close(fd)==-1)) reporterr(1,"%s\n",argv[2]);
270
271 if(((db=(unsigned char*) malloc(newsize+1))==NULL) ||
272 ((eb=(unsigned char*) malloc(newsize+1))==NULL))
273 reporterr(1,NULL);
274
275 dblen=0;
276 eblen=0;
277
278 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0)
279 reporterr(1,"%s\n",argv[3]);
280
281 /* start writing here */
282
283 /* We don't know the lengths yet, so we will write the header again
284 at the end */
285
286 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader))
287 reporterr(1,"%s\n",argv[3]);
288
289 scan=0;len=0;
290 lastscan=0;lastpos=0;lastoffset=0;
291 numtriples = 0;
292 while(scan<newsize) {
293 oldscore=0;
294
295 for(scsc=scan+=len;scan<newsize;scan++) {
296 len=search(I,old,oldsize,newbuf+scan,newsize-scan,
297 0,oldsize,&pos);
298
299 for(;scsc<scan+len;scsc++)
300 if((scsc+lastoffset<oldsize) &&
301 (old[scsc+lastoffset] == newbuf[scsc]))
302 oldscore++;
303
304 if(((len==oldscore) && (len!=0)) ||
305 (len>oldscore+8)) break;
306
307 if((scan+lastoffset<oldsize) &&
308 (old[scan+lastoffset] == newbuf[scan]))
309 oldscore--;
310 };
311
312 if((len!=oldscore) || (scan==newsize)) {
313 MBSPatchTriple triple;
314
315 s=0;Sf=0;lenf=0;
316 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
317 if(old[lastpos+i]==newbuf[lastscan+i]) s++;
318 i++;
319 if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
320 };
321
322 lenb=0;
323 if(scan<newsize) {
324 s=0;Sb=0;
325 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
326 if(old[pos-i]==newbuf[scan-i]) s++;
327 if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
328 };
329 };
330
331 if(lastscan+lenf>scan-lenb) {
332 overlap=(lastscan+lenf)-(scan-lenb);
333 s=0;Ss=0;lens=0;
334 for(i=0;i<overlap;i++) {
335 if(newbuf[lastscan+lenf-overlap+i]==
336 old[lastpos+lenf-overlap+i]) s++;
337 if(newbuf[scan-lenb+i]==
338 old[pos-lenb+i]) s--;
339 if(s>Ss) { Ss=s; lens=i+1; };
340 };
341
342 lenf+=lens-overlap;
343 lenb-=lens;
344 };
345
346 for(i=0;i<lenf;i++)
347 db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i];
348 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
349 eb[eblen+i]=newbuf[lastscan+lenf+i];
350
351 dblen+=lenf;
352 eblen+=(scan-lenb)-(lastscan+lenf);
353
354 triple.x = htonl(lenf);
355 triple.y = htonl((scan-lenb)-(lastscan+lenf));
356 triple.z = htonl((pos-lenb)-(lastpos+lenf));
357 if (write(fd,&triple,sizeof(triple)) != sizeof(triple))
358 reporterr(1,NULL);
359
360#ifdef DEBUG_bsmedberg
361 printf("Writing a block:\n"
362 " X: %u\n"
363 " Y: %u\n"
364 " Z: %i\n",
365 (uint32_t) lenf,
366 (uint32_t) ((scan-lenb)-(lastscan+lenf)),
367 (uint32_t) ((pos-lenb)-(lastpos+lenf)));
368#endif
369
370 ++numtriples;
371
372 lastscan=scan-lenb;
373 lastpos=pos-lenb;
374 lastoffset=pos-scan;
375 };
376 };
377
378 if(write(fd,db,dblen)!=dblen)
379 reporterr(1,NULL);
380
381 if(write(fd,eb,eblen)!=eblen)
382 reporterr(1,NULL);
383
384 header.slen = htonl(oldsize);
385 header.scrc32 = htonl(scrc);
386 header.dlen = htonl(newsize);
387 header.cblen = htonl(numtriples * sizeof(MBSPatchTriple));
388 header.difflen = htonl(dblen);
389 header.extralen = htonl(eblen);
390
391 if (lseek(fd,0,SEEK_SET) == -1 ||
392 write(fd,&header,sizeof(header)) != sizeof(header) ||
393 close(fd) == -1)
394 reporterr(1,NULL);
395
396 free(db);
397 free(eb);
398 free(I);
399 free(old);
400 free(newbuf);
401
402 return 0;
403}
404
405/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
constexpr sal_Int8 header[]
int main(int argc, char *argv[])
Definition: bsdiff.cxx:212
static void qsufsort(int32_t *I, int32_t *V, unsigned char *old, int32_t oldsize)
Definition: bsdiff.cxx:136
#define _O_BINARY
Definition: bsdiff.cxx:32
#define MIN(x, y)
Definition: bsdiff.cxx:36
static int32_t search(int32_t *I, unsigned char *old, int32_t oldsize, unsigned char *newbuf, int32_t newsize, int32_t st, int32_t en, int32_t *pos)
Definition: bsdiff.cxx:186
static void split(int32_t *I, int32_t *V, int32_t start, int32_t len, int32_t h)
Definition: bsdiff.cxx:74
static unsigned int crc32(const unsigned char *buf, unsigned int len)
Definition: bsdiff.cxx:46
unsigned int BZ2_crc32Table[256]
Definition: bsdiff.cxx:43
static int32_t matchlen(unsigned char *old, int32_t oldsize, unsigned char *newbuf, int32_t newsize)
Definition: bsdiff.cxx:175
static void reporterr(int e, const char *fmt,...)
Definition: bsdiff.cxx:61
bool close
float y
float x
return NULL
int i
end
args
sal_uInt32 htonl(sal_uInt32 h)
sal_Int32 h
int32_t z
Definition: bspatch.h:92
uint32_t y
Definition: bspatch.h:91
uint32_t x
Definition: bspatch.h:90
size_t pos
const sal_uInt8 V