bsdiff.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. /*-
  2. * Copyright 2003-2005 Colin Percival
  3. * Copyright 2012 Matthew Endsley
  4. * All rights reserved
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted providing that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  16. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  19. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  23. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  24. * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25. * POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include "bsdiff.h"
  28. #include <limits.h>
  29. #include <string.h>
  30. #ifndef MIN
  31. #define MIN(x,y) (((x)<(y)) ? (x) : (y))
  32. #endif
  33. static void split(int64_t *I,int64_t *V,int64_t start,int64_t len,int64_t h)
  34. {
  35. int64_t i,j,k,x,tmp,jj,kk;
  36. if(len<16) {
  37. for(k=start;k<start+len;k+=j) {
  38. j=1;x=V[I[k]+h];
  39. for(i=1;k+i<start+len;i++) {
  40. if(V[I[k+i]+h]<x) {
  41. x=V[I[k+i]+h];
  42. j=0;
  43. };
  44. if(V[I[k+i]+h]==x) {
  45. tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
  46. j++;
  47. };
  48. };
  49. for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
  50. if(j==1) I[k]=-1;
  51. };
  52. return;
  53. };
  54. x=V[I[start+len/2]+h];
  55. jj=0;kk=0;
  56. for(i=start;i<start+len;i++) {
  57. if(V[I[i]+h]<x) jj++;
  58. if(V[I[i]+h]==x) kk++;
  59. };
  60. jj+=start;kk+=jj;
  61. i=start;j=0;k=0;
  62. while(i<jj) {
  63. if(V[I[i]+h]<x) {
  64. i++;
  65. } else if(V[I[i]+h]==x) {
  66. tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
  67. j++;
  68. } else {
  69. tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
  70. k++;
  71. };
  72. };
  73. while(jj+j<kk) {
  74. if(V[I[jj+j]+h]==x) {
  75. j++;
  76. } else {
  77. tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
  78. k++;
  79. };
  80. };
  81. if(jj>start) split(I,V,start,jj-start,h);
  82. for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
  83. if(jj==kk-1) I[jj]=-1;
  84. if(start+len>kk) split(I,V,kk,start+len-kk,h);
  85. }
  86. static void qsufsort(int64_t *I,int64_t *V,const uint8_t *old,int64_t oldsize)
  87. {
  88. int64_t buckets[256];
  89. int64_t i,h,len;
  90. for(i=0;i<256;i++) buckets[i]=0;
  91. for(i=0;i<oldsize;i++) buckets[old[i]]++;
  92. for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
  93. for(i=255;i>0;i--) buckets[i]=buckets[i-1];
  94. buckets[0]=0;
  95. for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
  96. I[0]=oldsize;
  97. for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
  98. V[oldsize]=0;
  99. for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
  100. I[0]=-1;
  101. for(h=1;I[0]!=-(oldsize+1);h+=h) {
  102. len=0;
  103. for(i=0;i<oldsize+1;) {
  104. if(I[i]<0) {
  105. len-=I[i];
  106. i-=I[i];
  107. } else {
  108. if(len) I[i-len]=-len;
  109. len=V[I[i]]+1-i;
  110. split(I,V,i,len,h);
  111. i+=len;
  112. len=0;
  113. };
  114. };
  115. if(len) I[i-len]=-len;
  116. };
  117. for(i=0;i<oldsize+1;i++) I[V[i]]=i;
  118. }
  119. static int64_t matchlen(const uint8_t *old,int64_t oldsize,const uint8_t *new,int64_t newsize)
  120. {
  121. int64_t i;
  122. for(i=0;(i<oldsize)&&(i<newsize);i++)
  123. if(old[i]!=new[i]) break;
  124. return i;
  125. }
  126. static int64_t search(const int64_t *I,const uint8_t *old,int64_t oldsize,
  127. const uint8_t *new,int64_t newsize,int64_t st,int64_t en,int64_t *pos)
  128. {
  129. int64_t x,y;
  130. if(en-st<2) {
  131. x=matchlen(old+I[st],oldsize-I[st],new,newsize);
  132. y=matchlen(old+I[en],oldsize-I[en],new,newsize);
  133. if(x>y) {
  134. *pos=I[st];
  135. return x;
  136. } else {
  137. *pos=I[en];
  138. return y;
  139. }
  140. };
  141. x=st+(en-st)/2;
  142. if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
  143. return search(I,old,oldsize,new,newsize,x,en,pos);
  144. } else {
  145. return search(I,old,oldsize,new,newsize,st,x,pos);
  146. };
  147. }
  148. static int64_t writedata(struct bsdiff_stream* stream, const void* buffer, int64_t length)
  149. {
  150. int64_t result = 0;
  151. while (length > 0)
  152. {
  153. const int smallsize = (int)MIN(length, INT_MAX);
  154. const int writeresult = stream->write(stream, buffer, smallsize);
  155. if (writeresult == -1)
  156. {
  157. return -1;
  158. }
  159. result += writeresult;
  160. length -= smallsize;
  161. buffer = (uint8_t*)buffer + smallsize;
  162. }
  163. return result;
  164. }
  165. struct bsdiff_request
  166. {
  167. const uint8_t* old;
  168. int64_t oldsize;
  169. const uint8_t* new;
  170. int64_t newsize;
  171. struct bsdiff_stream* stream;
  172. int64_t *I;
  173. uint8_t *buffer;
  174. };
  175. static int bsdiff_internal(const struct bsdiff_request req)
  176. {
  177. int64_t *I,*V;
  178. int64_t scan,pos,len;
  179. int64_t lastscan,lastpos,lastoffset;
  180. int64_t oldscore,scsc;
  181. int64_t s,Sf,lenf,Sb,lenb;
  182. int64_t overlap,Ss,lens;
  183. int64_t i;
  184. uint8_t *buffer;
  185. uint8_t buf[8 * 3];
  186. if((V=req.stream->malloc((req.oldsize+1)*sizeof(int64_t)))==NULL) return -1;
  187. I = req.I;
  188. qsufsort(I,V,req.old,req.oldsize);
  189. req.stream->free(V);
  190. buffer = req.buffer;
  191. /* Compute the differences, writing ctrl as we go */
  192. scan=0;len=0;pos=0;
  193. lastscan=0;lastpos=0;lastoffset=0;
  194. while(scan<req.newsize) {
  195. oldscore=0;
  196. for(scsc=scan+=len;scan<req.newsize;scan++) {
  197. len=search(I,req.old,req.oldsize,req.new+scan,req.newsize-scan,
  198. 0,req.oldsize,&pos);
  199. for(;scsc<scan+len;scsc++)
  200. if((scsc+lastoffset<req.oldsize) &&
  201. (req.old[scsc+lastoffset] == req.new[scsc]))
  202. oldscore++;
  203. if(((len==oldscore) && (len!=0)) ||
  204. (len>oldscore+8)) break;
  205. if((scan+lastoffset<req.oldsize) &&
  206. (req.old[scan+lastoffset] == req.new[scan]))
  207. oldscore--;
  208. };
  209. if((len!=oldscore) || (scan==req.newsize)) {
  210. s=0;Sf=0;lenf=0;
  211. for(i=0;(lastscan+i<scan)&&(lastpos+i<req.oldsize);) {
  212. if(req.old[lastpos+i]==req.new[lastscan+i]) s++;
  213. i++;
  214. if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
  215. };
  216. lenb=0;
  217. if(scan<req.newsize) {
  218. s=0;Sb=0;
  219. for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
  220. if(req.old[pos-i]==req.new[scan-i]) s++;
  221. if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
  222. };
  223. };
  224. if(lastscan+lenf>scan-lenb) {
  225. overlap=(lastscan+lenf)-(scan-lenb);
  226. s=0;Ss=0;lens=0;
  227. for(i=0;i<overlap;i++) {
  228. if(req.new[lastscan+lenf-overlap+i]==
  229. req.old[lastpos+lenf-overlap+i]) s++;
  230. if(req.new[scan-lenb+i]==
  231. req.old[pos-lenb+i]) s--;
  232. if(s>Ss) { Ss=s; lens=i+1; };
  233. };
  234. lenf+=lens-overlap;
  235. lenb-=lens;
  236. };
  237. offtout(lenf,buf);
  238. offtout((scan-lenb)-(lastscan+lenf),buf+8);
  239. offtout((pos-lenb)-(lastpos+lenf),buf+16);
  240. /* Write control data */
  241. if (writedata(req.stream, buf, sizeof(buf)))
  242. return -1;
  243. /* Write diff data */
  244. for(i=0;i<lenf;i++)
  245. buffer[i]=req.new[lastscan+i]-req.old[lastpos+i];
  246. if (writedata(req.stream, buffer, lenf))
  247. return -1;
  248. /* Write extra data */
  249. for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
  250. buffer[i]=req.new[lastscan+lenf+i];
  251. if (writedata(req.stream, buffer, (scan-lenb)-(lastscan+lenf)))
  252. return -1;
  253. lastscan=scan-lenb;
  254. lastpos=pos-lenb;
  255. lastoffset=pos-scan;
  256. };
  257. };
  258. return 0;
  259. }
  260. TOOLKIT_API int bsdiff(const uint8_t* old, int64_t oldsize, const uint8_t* newone, int64_t newsize, struct bsdiff_stream* diffstream)
  261. {
  262. int result;
  263. struct bsdiff_request req;
  264. if((req.I= diffstream->malloc((oldsize+1)*sizeof(int64_t)))==NULL)
  265. return -1;
  266. if((req.buffer= diffstream->malloc(newsize+1))==NULL)
  267. {
  268. diffstream->free(req.I);
  269. return -1;
  270. }
  271. req.old = old;
  272. req.oldsize = oldsize;
  273. req.new = newone;
  274. req.newsize = newsize;
  275. req.stream = diffstream;
  276. result = bsdiff_internal(req);
  277. diffstream->free(req.buffer);
  278. diffstream->free(req.I);
  279. return result;
  280. }