123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 |
- /*-
- * Copyright 2003-2005 Colin Percival
- * Copyright 2012 Matthew Endsley
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted providing that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
- #include "bsdiff.h"
- #include <limits.h>
- #include <string.h>
- #ifndef MIN
- #define MIN(x,y) (((x)<(y)) ? (x) : (y))
- #endif
- static void split(int64_t *I,int64_t *V,int64_t start,int64_t len,int64_t h)
- {
- int64_t i,j,k,x,tmp,jj,kk;
- if(len<16) {
- for(k=start;k<start+len;k+=j) {
- j=1;x=V[I[k]+h];
- for(i=1;k+i<start+len;i++) {
- if(V[I[k+i]+h]<x) {
- x=V[I[k+i]+h];
- j=0;
- };
- if(V[I[k+i]+h]==x) {
- tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
- j++;
- };
- };
- for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
- if(j==1) I[k]=-1;
- };
- return;
- };
- x=V[I[start+len/2]+h];
- jj=0;kk=0;
- for(i=start;i<start+len;i++) {
- if(V[I[i]+h]<x) jj++;
- if(V[I[i]+h]==x) kk++;
- };
- jj+=start;kk+=jj;
- i=start;j=0;k=0;
- while(i<jj) {
- if(V[I[i]+h]<x) {
- i++;
- } else if(V[I[i]+h]==x) {
- tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
- j++;
- } else {
- tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
- k++;
- };
- };
- while(jj+j<kk) {
- if(V[I[jj+j]+h]==x) {
- j++;
- } else {
- tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
- k++;
- };
- };
- if(jj>start) split(I,V,start,jj-start,h);
- for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
- if(jj==kk-1) I[jj]=-1;
- if(start+len>kk) split(I,V,kk,start+len-kk,h);
- }
- static void qsufsort(int64_t *I,int64_t *V,const uint8_t *old,int64_t oldsize)
- {
- int64_t buckets[256];
- int64_t i,h,len;
- for(i=0;i<256;i++) buckets[i]=0;
- for(i=0;i<oldsize;i++) buckets[old[i]]++;
- for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
- for(i=255;i>0;i--) buckets[i]=buckets[i-1];
- buckets[0]=0;
- for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
- I[0]=oldsize;
- for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
- V[oldsize]=0;
- for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
- I[0]=-1;
- for(h=1;I[0]!=-(oldsize+1);h+=h) {
- len=0;
- for(i=0;i<oldsize+1;) {
- if(I[i]<0) {
- len-=I[i];
- i-=I[i];
- } else {
- if(len) I[i-len]=-len;
- len=V[I[i]]+1-i;
- split(I,V,i,len,h);
- i+=len;
- len=0;
- };
- };
- if(len) I[i-len]=-len;
- };
- for(i=0;i<oldsize+1;i++) I[V[i]]=i;
- }
- static int64_t matchlen(const uint8_t *old,int64_t oldsize,const uint8_t *new,int64_t newsize)
- {
- int64_t i;
- for(i=0;(i<oldsize)&&(i<newsize);i++)
- if(old[i]!=new[i]) break;
- return i;
- }
- static int64_t search(const int64_t *I,const uint8_t *old,int64_t oldsize,
- const uint8_t *new,int64_t newsize,int64_t st,int64_t en,int64_t *pos)
- {
- int64_t x,y;
- if(en-st<2) {
- x=matchlen(old+I[st],oldsize-I[st],new,newsize);
- y=matchlen(old+I[en],oldsize-I[en],new,newsize);
- if(x>y) {
- *pos=I[st];
- return x;
- } else {
- *pos=I[en];
- return y;
- }
- };
- x=st+(en-st)/2;
- if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
- return search(I,old,oldsize,new,newsize,x,en,pos);
- } else {
- return search(I,old,oldsize,new,newsize,st,x,pos);
- };
- }
- static int64_t writedata(struct bsdiff_stream* stream, const void* buffer, int64_t length)
- {
- int64_t result = 0;
- while (length > 0)
- {
- const int smallsize = (int)MIN(length, INT_MAX);
- const int writeresult = stream->write(stream, buffer, smallsize);
- if (writeresult == -1)
- {
- return -1;
- }
- result += writeresult;
- length -= smallsize;
- buffer = (uint8_t*)buffer + smallsize;
- }
- return result;
- }
- struct bsdiff_request
- {
- const uint8_t* old;
- int64_t oldsize;
- const uint8_t* new;
- int64_t newsize;
- struct bsdiff_stream* stream;
- int64_t *I;
- uint8_t *buffer;
- };
- static int bsdiff_internal(const struct bsdiff_request req)
- {
- int64_t *I,*V;
- int64_t scan,pos,len;
- int64_t lastscan,lastpos,lastoffset;
- int64_t oldscore,scsc;
- int64_t s,Sf,lenf,Sb,lenb;
- int64_t overlap,Ss,lens;
- int64_t i;
- uint8_t *buffer;
- uint8_t buf[8 * 3];
- if((V=req.stream->malloc((req.oldsize+1)*sizeof(int64_t)))==NULL) return -1;
- I = req.I;
- qsufsort(I,V,req.old,req.oldsize);
- req.stream->free(V);
- buffer = req.buffer;
- /* Compute the differences, writing ctrl as we go */
- scan=0;len=0;pos=0;
- lastscan=0;lastpos=0;lastoffset=0;
- while(scan<req.newsize) {
- oldscore=0;
- for(scsc=scan+=len;scan<req.newsize;scan++) {
- len=search(I,req.old,req.oldsize,req.new+scan,req.newsize-scan,
- 0,req.oldsize,&pos);
- for(;scsc<scan+len;scsc++)
- if((scsc+lastoffset<req.oldsize) &&
- (req.old[scsc+lastoffset] == req.new[scsc]))
- oldscore++;
- if(((len==oldscore) && (len!=0)) ||
- (len>oldscore+8)) break;
- if((scan+lastoffset<req.oldsize) &&
- (req.old[scan+lastoffset] == req.new[scan]))
- oldscore--;
- };
- if((len!=oldscore) || (scan==req.newsize)) {
- s=0;Sf=0;lenf=0;
- for(i=0;(lastscan+i<scan)&&(lastpos+i<req.oldsize);) {
- if(req.old[lastpos+i]==req.new[lastscan+i]) s++;
- i++;
- if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
- };
- lenb=0;
- if(scan<req.newsize) {
- s=0;Sb=0;
- for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
- if(req.old[pos-i]==req.new[scan-i]) s++;
- if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
- };
- };
- if(lastscan+lenf>scan-lenb) {
- overlap=(lastscan+lenf)-(scan-lenb);
- s=0;Ss=0;lens=0;
- for(i=0;i<overlap;i++) {
- if(req.new[lastscan+lenf-overlap+i]==
- req.old[lastpos+lenf-overlap+i]) s++;
- if(req.new[scan-lenb+i]==
- req.old[pos-lenb+i]) s--;
- if(s>Ss) { Ss=s; lens=i+1; };
- };
- lenf+=lens-overlap;
- lenb-=lens;
- };
- offtout(lenf,buf);
- offtout((scan-lenb)-(lastscan+lenf),buf+8);
- offtout((pos-lenb)-(lastpos+lenf),buf+16);
- /* Write control data */
- if (writedata(req.stream, buf, sizeof(buf)))
- return -1;
- /* Write diff data */
- for(i=0;i<lenf;i++)
- buffer[i]=req.new[lastscan+i]-req.old[lastpos+i];
- if (writedata(req.stream, buffer, lenf))
- return -1;
- /* Write extra data */
- for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
- buffer[i]=req.new[lastscan+lenf+i];
- if (writedata(req.stream, buffer, (scan-lenb)-(lastscan+lenf)))
- return -1;
- lastscan=scan-lenb;
- lastpos=pos-lenb;
- lastoffset=pos-scan;
- };
- };
- return 0;
- }
- TOOLKIT_API int bsdiff(const uint8_t* old, int64_t oldsize, const uint8_t* newone, int64_t newsize, struct bsdiff_stream* diffstream)
- {
- int result;
- struct bsdiff_request req;
- if((req.I= diffstream->malloc((oldsize+1)*sizeof(int64_t)))==NULL)
- return -1;
- if((req.buffer= diffstream->malloc(newsize+1))==NULL)
- {
- diffstream->free(req.I);
- return -1;
- }
- req.old = old;
- req.oldsize = oldsize;
- req.new = newone;
- req.newsize = newsize;
- req.stream = diffstream;
- result = bsdiff_internal(req);
- diffstream->free(req.buffer);
- diffstream->free(req.I);
- return result;
- }
|