/* * $Id$ * gzip.i * interpreted gzip and tar packer/unpacker using yorick-z zlib.i package * * gzip format references: * ftp://ftp.uu.net/pub/archiving/zip/doc/ * ftp://ftp.uu.net/graphics/png/documents/zlib/INDEX.html * tar format reference: * http://www.delorie.com/gnu/docs/tar/tar_120.html */ /* Copyright (c) 2005, David H. Munro * All rights reserved. * This file is part of yorick (http://yorick.sourceforge.net). * This is open source software, distributed under a BSD license. */ func gzip(file, outname, bufsize=, level=) /* DOCUMENT gzip, filename or gzip, filename, gzname Compress FILENAME to produce a gzip file. By default, the compressed file name will be FILENAME+".gz", but if you supply the GZNAME, that will be the output file name. If FILENAME is a directory, the directory will be tarred first, and the default output name will be FILENAME+".tgz". This function does not work as well as the tar utility; the stored modification time will be the approximate time the gzip command ran, not the actual modification time of the file. The file permissions will always be 644 for files, and 755 for directories, except for the specially recognized extensions ".so", ".sl", ".dylib", ".exe", and ".sh", and the special filename "configure", which will be given 755 permissions. Soft links and hard links will become copies. In summary, this directory archiving feature is for light duty only! The gzip function accepts a bufsize= keyword (default 1 Mbyte), and a level= keyword (0-9, as for the gzip utility, with 9 the highest compression and slowest speed). SEE ALSO: gunzip, untar */ { require, "zlib.i"; if (!is_stream(file)) { local dlist; flist = lsdir(file, dlist); cname = *pointer(file); list = where(cname == '\\'); if (numberof(list)) cname(list) = '/'; list = where(cname == '/'); if (is_void(flist) || structof(flist)==string) { /* this is a directory */ outname = file + ".tgz"; notar = 0; if (numberof(list)) { if (numberof(list)>1 && list(0)==strlen(file)) topname = strpart(file, 1:list(-1)); else topname = strpart(file, 1:list(0)); } else { topname = file + "/"; } if (strpart(topname,2:2) == ":") topname = strpart(topname,3:0); if (strpart(topname,1:1) == "/") topname = strpart(topname,2:0); last = strpart(file,0:0); if (last!="\\" && last!="/") file = file + "/"; } else { /* this is a regular file */ inname = file; outname = file + ".gz"; file = open(file, "rb"); notar = 1; if (numberof(list)) inname = string(&cname(list(0)+1:0)); } } else { /* regular file, already open */ inname = strpart(strtok(print(file)(1), ":")(2), 2:0); outname = inname + ".gz"; notar = 1; } /* create output file and write gzip header */ out = open(outname, "wb"); oaddr = 0; head = ['\037', '\213', /* magic */ '\010', (notar? '\010' : '\0'), /* cm, fname flag */ '\0', '\0', '\0', '\0', /* mtime (0 unavailable) */ '\0', '\377']; /* xfl, os (unknown) */ _write, out, oaddr, head; oaddr += numberof(head); if (notar) { cname = *pointer(inname); _write, out, oaddr, cname; oaddr += numberof(cname); } buf = z_deflate(level); crc = []; if (is_void(bufsize)) bufsize = 0x100000; else bufsize = (bufsize & ~511) + ((bufsize&511)? 512 : 0); addr = 0; if (notar) { /* compress single file */ len = sizeof(file); bufsize = min(bufsize, len); body = array(char, bufsize); while (addr+bufsize < len) { _read, file, addr, body; if (!addr && allof(body(1:2)==['\037','\213'])) { close, out; remove, outname+"L"; error, "input file "+inname+" already compressed"; } crc = z_crc32(crc, body); if (z_deflate(buf, body) > 0) { data = z_flush(buf); if (!addr) data = data(3:0); _write, out, oaddr, data; oaddr += numberof(data); } addr += bufsize; } if (addr+bufsize > len) body = array(char, len-addr); _read, file, addr, body; crc = z_crc32(crc, body); data = z_flush(buf, body); if (!addr) data = data(3:0); data = data(1:-4); /* strip adler32 checksum from zlib stream */ _write, out, oaddr, data; oaddr += numberof(data); } else { /* tar and compress a directory tree */ gztime = gzip_time(); /* fake mtime */ len = pos = 0; body = array(char, bufsize); tarfile = []; first = 1; /* to remove first two bytes of compressed stream */ gzip_recurse, file, topname, flist, dlist; gzip_header; } tail = char([crc, crc>>8, crc>>16, crc>>24, len, len>>8, len>>16, len>>24] & 0xff); _write, out, oaddr, tail; close, out; remove, outname+"L"; } func gzip_recurse(file, topname, flist, dlist) { /* first do the directory header */ gzip_header, topname, 0; /* then do the files */ for (i=1 ; i<=numberof(flist) ; i++) { fn = file+flist(i); tarfile = open(fn, "rb", 1); flen = sizeof(tarfile); if (is_void(tarfile) || flen<=0) { write, "WARNING - skipping zero length or unopenable file "+fn; if (!is_void(tarfile)) close, tarfile; continue; } gzip_header, fn, flen; fbody = []; ndone = 0; for (addr=0 ; addr= numberof(body)) gzip_body, 0; else pos += (nf&~511) + ((nf&511)? 512 : 0); } close, tarfile; } /* then recurse down the directories */ local ff, dd; if (numberof(dlist)) dlist += "/"; for (i=1 ; i<=numberof(dlist) ; i++) { fn = file+dlist(i); ff = lsdir(fn, dd); gzip_recurse, fn, topname+dlist(i), ff, dd; } } func gzip_header(name, isize) { if (!is_void(name)) { cname = *pointer(name); if (numberof(cname) > 101) { if (numberof(cname) > 256) error, "tar file name too long: "+name; body(pos+1:pos+100) = cname(-100:-1); body(pos+346:pos+345+numberof(cname)-101) = cname(1:-101); } else { body(pos+1:pos+numberof(cname)-1) = cname(1:-1) } if (!isize) { mode = 0040755; } else { mode = 0100644; if (anyof(strpart(name,-2:0)==[".so",".sl",".sh"]) || anyof(strpart(name,-3:0)==[".dll",".exe"]) || strpart(name,-5:0)==".dylib" || name=="configure") mode |= 0111; } body(pos+101:pos+108) = *pointer(swrite(format="%07lo", mode)); body(pos+109:pos+116) = *pointer(swrite(format="%07lo", 01001)); body(pos+117:pos+124) = *pointer(swrite(format="%07lo", 01001)); body(pos+125:pos+136) = *pointer(swrite(format="%011lo", isize)); body(pos+137:pos+148) = *pointer(swrite(format="%011lo", gztime)); body(pos+149:pos+156) = ' '; body(pos+157) = isize? '0' : '5'; body(pos+258:pos+262) = ['u','s','t','a','r']; body(pos+266:pos+271) = body(pos+298:pos+303) = ['y','o','r','i','c','k']; cksum = sum(long(body(pos+1:pos+512))); body(pos+149:pos+155) = *pointer(swrite(format="%06lo", cksum)); pos += 512; if (pos >= numberof(body)) gzip_body, 0; } else { pos += 512; body = body(1:pos); gzip_body, 1; body = []; } } func gzip_body(final) { crc = z_crc32(crc, body); len += numberof(body); if (!final) { if (z_deflate(buf, body) > 0) { data = z_flush(buf); if (first) data = data(3:0); _write, out, oaddr, data; oaddr += numberof(data); } } else { data = z_flush(buf, body); data = data(1:-4); /* strip adler32 checksum from zlib stream */ if (first) data = data(3:0); _write, out, oaddr, data; oaddr += numberof(data); } body() = '\0'; extern pos, first; pos = first = 0; } func gzip_time(void) { m = ""; h = n = s = d = y = 0; sread, format="%s %ld %ld:%ld:%ld %ld", strpart(timestamp(), 5:0), m, d, h, n, s, y; m = where(m==["Jan","Feb","Mar","Apr","May","Jun", "Jul","Aug","Sep","Oct","Nov","Dec"])(1); /* zero for 2000 Jan 0 0000 UT (1999 Dec 31 0000 UT) */ day = 367*y - 7*(y+(m+9)/12)/4 + 275*m/9 + d - 730530; day += 10956; /* zero for UNIX 1970 Jan 1 */ return ((day*24 + h)*60 + m)*60 + s; } func gunzip(file, outname, bufsize=, notar=, tardir=) /* DOCUMENT gunzip, gzfile or gunzip, gzfile, outname Uncompresses file GZFILE, which is in gzip format. If GZFILE is a compressed tar archive file (usually with a .tgz or .tar.gz extension), gunzip will also unpack the archive, unless you supply a notar=1 keyword. If the name GZFILE ends in ".gz", or if the original filename is stored in the file, that will be the default name of the output file. You can also supply an explicit OUTNAME argument. This is for light duty only; it does not correctly set file permissions, modification time, owner, or group. It also skips soft or hard links or anything other than regular files and directories. The gunzip function accepts a bufsize= keyword (default 1 Mbyte), and a tardir=DIRNAME function. The latter keyword untars the file in the specified directory, instead of in the current working directory, in the case that GZFILE is a compressed tar archive. The gunzip function will refuse to unpack a "tarbomb", that is, a tar archive in which the regular files are not inside any subdirectory. You need to use the tardir= keyword in that case. SEE ALSO: gzip, untar */ { require, "zlib.i"; if (is_void(tardir) || !strlen(tardir)) tardir = string(0); if (tardir && strpart(tardir, 0:0)=='\\') tardir = strpart(tardir, 1:-1); if (tardir && strpart(tardir, 0:0)!='/') tardir = tardir + "/"; if (is_void(bufsize)) bufsize = 0x100000; else bufsize = (bufsize & ~511) + ((bufsize&511)? 512 : 0); if (!is_stream(file)) file = open(file, "rb"); addr = 0; head = array(char, 10); addr0 = addr; _read, file, addr, head; addr += numberof(head); if (head(1)!='\037' || head(2)!='\213') error, "bad magic"; cm = head(3); if (cm != 8) error, "only handle deflate compression method"; ftext = head(4) & 1; /* file probably text if set */ fhcrc = head(4) & 2; fextra = head(4) & 4; fname = head(4) & 8; fcomment = head(4) & 16; /* mtime is UNIX seconds since 1/Jan/1970, 0 if unavailable */ mtime = long(head(5:8)) mtime = mtime(1) | (mtime(2)<<8) | (mtime(3)<<16) | (mtime(4)<<24); xfl = head(9); /* 2 max compression, 4 fastest compression */ os = head(10); /* 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32) 1 - Amiga 2 - VMS (or OpenVMS) 3 - Unix 4 - VM/CMS 5 - Atari TOS 6 - HPFS filesystem (OS/2, NT) 7 - Macintosh 8 - Z-System 9 - CP/M 10 - TOPS-20 11 - NTFS filesystem (NT) 12 - QDOS 13 - Acorn RISCOS 255 - unknown */ if (fextra) { xlen = array(char, 2); _read, file, addr, xlen; addr += numberof(xlen); xlen = long(xlen); xlen = xlen(1) | (xlen(2)<<8); extra = array(char, xlen); _read, file, addr, extra; addr += numberof(extra); /* extra is sequence of 2 byte id, 2 byte len, and len byte data */ } if (fname) { oname = array(char, 4096); _read, file, addr, oname; list = where(!oname); if (!numberof(list)) error, "original filename >4096 characters"; oname = oname(1:list(1)); addr += numberof(oname); if (is_void(outname)) outname = string(&oname); } else if (is_void(outname)) { outname = strpart(strtok(print(file)(1), ":")(2), 2:0); if (strpart(outname, -2:0) == ".gz") outname = strpart(outname, 1:-3); else if (strpart(outname, -3:0) == ".tgz") outname = strpart(outname, 1:-4) + ".tar"; else error, "unable to guess output filename for "+outname; } if (fcomment) { comment = array(char, 4096); _read, file, addr, comment; list = where(!comment); if (!numberof(list)) error, "file comment >4096 characters"; comment = comment(1:list(1)); addr += numberof(comment); } if (fhcrc) { hcrc = array(char, 2); _read, file, addr, hcrc; hcrc = long(hcrc); hcrc = hcrc(1) | (hcrc(2)<<8); head = array(char, addr - addr0); _read, file, addr0, head; addr += numberof(hcrc); if (hcrc != (z_crc32(, head) & 0xffff)) error, "crc16 gzip header checksum failed"; } /* compressed data begins at addr */ size = sizeof(file) - 8; /* allow for tail */ buf = z_inflate(); /* two byte zlib header omitted at beginning of data */ cinfo = 7; /* window size = 2^(8+cinfo), no effect? */ cmf = (long(cm) & 0xf) | (cinfo<<4); fcheck = 31 - ((cmf<<8) % 31); body0 = char([cmf, fcheck]); /* always [0x78, 0x01] */ addr -= 2; body = crc = adler = out = []; len = oaddr = tarpos = 0; tarbuf = tarname = tarflags = []; notar = notar? 1 : -1; for (flag=1 ; flag>0 ;) { if (addr >= size) { /* error, "unexpected end of gzip file"; (warning below) */ break; } bufsize = min(size-addr, bufsize); if (numberof(body) != bufsize) body = array(char, bufsize); _read, file, addr, body; addr += numberof(body); if (!is_void(body0)) { body(1:2) = body0; body0 = []; } flag = z_inflate(buf, body); if (flag == 1) continue; if (flag == 3) error, "dictionary required to decompress this gzip stream"; if (flag < -1) break; data = z_flush(buf); crc = z_crc32(crc, data); len += numberof(data); adler = z_crc32(adler, data, 1); if (addr >= size) { flag = z_inflate(buf, char([adler>>24,adler>>16,adler>>8,adler]&0xff)); if (flag) write, "WARNING - z_inflate final flag = "+print(flag)(1); } if (notar < 0) { grow, tarbuf, data; if (len >= 512) { notar = tar_header(); if (!notar) data = []; else if (notar > 0) tarbuf = []; } } if (notar > 0) { if (is_void(out)) out = open(outname, "wb"); _write, out, oaddr, data; oaddr += numberof(data); } else if (!notar) { tar_write, data; } data = []; } if (notar<0 && len>0) { out = open(outname, "wb"); _write, out, oaddr, tarbuf; } if (!is_void(out)) { close, out; remove, (notar? outname : tarname)+"L"; } if (flag < -1) error, "gzip data stream corrupted"; /* after compressed data comes crc32 for uncompressed data * and original size of uncompressed data (modulo 2^32) * no way to know where this is for sure... */ if (!flag || addr>=size) { tail = array(char, 8); _read, file, addr, tail; addr += numberof(tail); tail = long(tail); crc32 = tail(1) | (tail(2)<<8) | (tail(3)<<16) | (tail(4)<<24); isize = tail(5) | (tail(6)<<8) | (tail(7)<<16) | (tail(8)<<24); if (crc != crc32) error, "crc32 gzip file checksum failed"; if (isize != len) error, "uncompressed gzip data has wrong length"; } /* if addr < sizeof(file), there is another "member" with same * format, beginning with head * but impossible to tell where it begins with z_inflate interface */ } func tar_header(void) { extern tarbuf, tarname, tarflags, tarpos; header = tarbuf(tarpos+1:tarpos+512); tarpos += 512; if (noneof(header(1:512))) return 2; /* end of tar */ tarname = string(&header(1:100)); /* path name, with slashes */ mode = untar_number(header(101:108)); /* 4000 suid 2000 sgid 1000 svtx */ isize = untar_number(header(125:136)); /* zero for link or dir */ cksum = untar_number(header(149:156)); /* six octal digits \0 space */ ftype = header(157); if (!ftype) ftype = '0'; ustar = string(&header(258:265)); if (ustar=="ustar" || ustar=="ustar ") { /* POSIX or GNU */ /* prefix null if tarname less than or equal 100 chars */ tarname = string(&header(346:500)) + tarname; } if (strpart(tarname,1:1) == "/") tarname = strpart(tarname, 2:0); if (tardir) tarname = tardir + tarname; header(149:156) = ' '; if (cksum != sum(long(header))) return 1; tarflags = [isize, mode, ftype]; return 0; } func tar_write(data) { extern out, oaddr, tarbuf, tarname, tarflags, tarpos; for (;;) { /* check if old (partial header) data exhausted, switch to new */ while (tarpos >= numberof(tarbuf)) { /* tarpos is number of bytes of tarbuf already used */ tarpos -= numberof(tarbuf); tarbuf = []; tarbuf = data; if (!numberof(tarbuf)) return; data = []; } /* if there is a current output file, continue writing it */ if (!is_void(out)) { isize = tarflags(1); n = min(isize-oaddr, numberof(tarbuf)-tarpos); _write, out, oaddr, tarbuf(tarpos+1:tarpos+n); oaddr += n; tarpos += n; if (oaddr == isize) { /* file finished, close it */ close, out; remove, tarname+"L"; if (tarflags(2)&0100) chmod, "+x", tarname; tarname = tarflags = out = []; /* round isize up to next 512 byte block boundary * to discard any garbage bytes at end of a block */ isize = (isize & ~511) + ((isize&511)? 512 : 0); tarpos += isize-oaddr; oaddr = 0; } if (tarpos >= numberof(tarbuf)) continue; } oaddr = 0; /* get the next tar header block */ if (is_void(tarname)) { if (!is_void(tarflags)) return; /* beyond end of tar stream */ if (tarpos+512 >= numberof(tarbuf)) { tarbuf = tarbuf(tarpos+1:0); tarpos = 0; if (!numberof(data)) return; grow, tarbuf, data; data = []; if (numberof(tarbuf) < 512) return; } flag = tar_header(); if (flag == 1) error, "tar header checksum failure"; if (flag == 2) { tarflags = [-1,0,0]; /* signal end of tar stream */ return; } } cname = *pointer(tarname); list = where(cname == '/'); ftype = tarflags(3); if (ftype == '0') { /* open regular file */ if (!numberof(list)) error, "refusing to unpack tarbomb, use tardir= keyword"; if (list(0) == strlen(tarname)) error, "found regular filename ending in /: "+tarname; out = open(tarname, "wb", 1); if (is_void(out)) { mkdirp, cname, list; out = open(tarname, "wb", 1); if (is_void(out)) error, "unable to create file "+tarname; } } else if (ftype == '5') { /* create directory */ if (!numberof(list) || list(0)!=strlen(tarname)) { cname(0) = '/'; grow, list, [numberof(cname)]; } mkdirp, cname, list; tarname = tarflags = []; } else { /* skip past anything else */ isize = tarflags(1); isize = (isize & ~511) + ((isize&511)? 512 : 0); tarpos += isize; tarname = tarflags = []; } } } func untar(file, todir=, bufsize=) /* DOCUMENT untar, tarfile Unpacks TARFILE, a tar archive file. This is for light duty only; it does not correctly set file permissions, modification time, owner, or group. It also skips soft or hard links or anything other than regular files and directories. The untar function accepts a bufsize= keyword (default 1 Mbyte), and a todir=DIRNAME function. The latter keyword untars the file in the specified directory, instead of in the current working directory. The untar function will refuse to unpack a "tarbomb", that is, a tar archive in which the regular files are not inside any subdirectory. You need to use the todir= keyword in that case. SEE ALSO: gunzip, gzip */ { if (!is_stream(file)) file = open(file, "rb"); addr = 0; size = sizeof(file); header = array(char, 512); if (is_void(todir) || !strlen(todir)) todir = string(0); if (todir && strpart(todir, 0:0)=='\\') todir = strpart(todir, 1:-1); if (todir && strpart(todir, 0:0)!='/') todir = todir + "/"; if (is_void(bufsize)) bufsize = 0x100000; else bufsize = (bufsize & ~511) + ((bufsize&511)? 512 : 0); while (addr < size) { header() = '\0'; _read, file, addr, header; addr += 512; if (noneof(header)) break; oname = string(&header(1:100)); /* path name, with slashes */ mode = untar_number(header(101:108)); /* 4000 suid 2000 sgid 1000 svtx */ /* mode 0040000 for directories, 0100000 for regular files?? */ exe = mode & 0100; uid = untar_number(header(109:116)); gid = untar_number(header(117:124)); isize = untar_number(header(125:136)); /* zero for link or dir */ mtime = untar_number(header(137:148)); cksum = untar_number(header(149:156)); /* six octal digits \0 space */ ftype = header(157); if (!ftype) ftype = '0'; /* LF_OLDNORMAL '\0' Normal disk file, Unix compatible (obsolete form) LF_NORMAL '0' Normal disk file LF_LINK '1' Link to previously dumped file LF_SYMLINK '2' Symbolic link LF_CHR '3' Character special file LF_BLK '4' Block special file LF_DIR '5' Directory (name should end with /, isize ignored, 0 ok) LF_FIFO '6' FIFO special file LF_CONTIG '7' Contiguous file */ lname = untar_number(header(158:257)); ustar = string(&header(258:265)); if (ustar=="ustar" || ustar=="ustar ") { /* POSIX or GNU */ uname = string(&header(266:297)); gname = string(&header(298:329)); devmajor = untar_number(header(330:337)); devminor = untar_number(header(338:345)); /* prefix null if oname less than or equal 100 chars */ oname = string(&header(346:500)) + oname; } header(149:156) = ' '; if (cksum != sum(long(header))) error, "tar header checksum failure"; /* file contents in 512 byte blocks, file ends with 512 zero bytes */ if (strpart(oname,1:1) == "/") oname = strpart(oname, 2:0); if (todir) oname = todir + oname; cname = *pointer(oname); list = where(cname == '/'); if (ftype == '0') { if (!numberof(list)) error, "refusing to unpack tarbomb, use todir= keyword"; if (list(0) == strlen(oname)) error, "found regular filename ending in /: "+oname; ofile = open(oname, "wb", 1); if (is_void(ofile)) { mkdirp, cname, list; ofile = open(oname, "wb", 1); if (is_void(ofile)) error, "unable to create file "+oname; } bsize = bufsize; if (bsize > isize) bsize = (isize & ~511) + ((isize&511)? 512 : 0); data = array(char, bsize); for (oaddr=0 ; oaddr isize) data = data(1:isize-oaddr); _write, ofile, oaddr, data; } close, ofile; remove, oname+"L"; if (exe) chmod, "+x", oname; } else if (ftype == '5') { if (!numberof(list) || list(0)!=strlen(oname)) { cname(0) = '/'; grow, list, [numberof(cname)]; } mkdirp, cname, list; } else { write," unknown", string(&ftype); addr += (isize & ~511) + ((isize&511)? 512 : 0); } } } func untar_number(bytes) { if (structof(bytes) == char) bytes = string(&bytes); n = 0; sread, bytes, format="%lo", n; return n; } func chmod(mode, file) { if (is_void(_chmod)) { if (open("/bin/chmod","rb",1)) _chmod = "/bin/chmod"; else if (open("/usr/bin/chmod","rb",1)) _chmod = "/usr/bin/chmod"; else _chmod = string(0); } if (_chmod) system, _chmod+" "+mode+" "+file; } func mkdirp(cname, list) { /* like mkdir, but create any missing parent directories */ if (structof(cname)==string) cname = *pointer(cname); if (is_void(list)) { list = where(cname == '/'); if (!numberof(list)) list = [numberof(cname)]; else if (list(0)0 ; i--) { dir = string(&cname(1:list(i))); flag = lsdir(dir); if (is_void(flag) || structof(flag)==string) break; } while (i < numberof(list)) mkdir, string(&cname(1:list(++i))); }