Fix the dependency issue (#231)

This commit is contained in:
Robbie Zhang
2018-06-21 12:09:42 -07:00
committed by GitHub
parent 027b76651d
commit 6ec1098bb8
16629 changed files with 74837 additions and 4975021 deletions

View File

@@ -1,44 +0,0 @@
asm
===
This library for assembly and disassembly of tar archives, facilitated by
`github.com/vbatts/tar-split/tar/storage`.
Concerns
--------
For completely safe assembly/disassembly, there will need to be a Content
Addressable Storage (CAS) directory, that maps to a checksum in the
`storage.Entity` of `storage.FileType`.
This is due to the fact that tar archives _can_ allow multiple records for the
same path, but the last one effectively wins. Even if the prior records had a
different payload.
In this way, when assembling an archive from relative paths, if the archive has
multiple entries for the same path, then all payloads read in from a relative
path would be identical.
Thoughts
--------
Have a look-aside directory or storage. This way when a clobbering record is
encountered from the tar stream, then the payload of the prior/existing file is
stored to the CAS. This way the clobbering record's file payload can be
extracted, but we'll have preserved the payload needed to reassemble a precise
tar archive.
clobbered/path/to/file.[0-N]
*alternatively*
We could just _not_ support tar streams that have clobbering file paths.
Appending records to the archive is not incredibly common, and doesn't happen
by default for most implementations. Not supporting them wouldn't be a
security concern either, as if it did occur, we would reassemble an archive
that doesn't validate signature/checksum, so it shouldn't be trusted anyway.
Otherwise, this will allow us to defer support for appended files as a FUTURE FEATURE.

View File

@@ -1,256 +0,0 @@
package asm
import (
"bytes"
"compress/gzip"
"crypto/sha1"
"fmt"
"hash/crc64"
"io"
"io/ioutil"
"os"
"testing"
"github.com/vbatts/tar-split/tar/storage"
)
var entries = []struct {
Entry storage.Entry
Body []byte
}{
{
Entry: storage.Entry{
Type: storage.FileType,
Name: "./hurr.txt",
Payload: []byte{2, 116, 164, 177, 171, 236, 107, 78},
Size: 20,
},
Body: []byte("imma hurr til I derp"),
},
{
Entry: storage.Entry{
Type: storage.FileType,
Name: "./ermahgerd.txt",
Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187},
Size: 26,
},
Body: []byte("café con leche, por favor"),
},
{
Entry: storage.Entry{
Type: storage.FileType,
NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip.
Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187},
Size: 26,
},
Body: []byte("café con leche, por favor"),
},
}
var entriesMangled = []struct {
Entry storage.Entry
Body []byte
}{
{
Entry: storage.Entry{
Type: storage.FileType,
Name: "./hurr.txt",
Payload: []byte{3, 116, 164, 177, 171, 236, 107, 78},
Size: 20,
},
// switch
Body: []byte("imma derp til I hurr"),
},
{
Entry: storage.Entry{
Type: storage.FileType,
Name: "./ermahgerd.txt",
Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187},
Size: 26,
},
// san not con
Body: []byte("café sans leche, por favor"),
},
{
Entry: storage.Entry{
Type: storage.FileType,
NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4},
Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187},
Size: 26,
},
Body: []byte("café con leche, por favor"),
},
}
func TestTarStreamMangledGetterPutter(t *testing.T) {
fgp := storage.NewBufferFileGetPutter()
// first lets prep a GetPutter and Packer
for i := range entries {
if entries[i].Entry.Type == storage.FileType {
j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body))
if err != nil {
t.Error(err)
}
if j != entries[i].Entry.Size {
t.Errorf("size %q: expected %d; got %d",
entries[i].Entry.GetName(),
entries[i].Entry.Size,
j)
}
if !bytes.Equal(csum, entries[i].Entry.Payload) {
t.Errorf("checksum %q: expected %v; got %v",
entries[i].Entry.GetName(),
entries[i].Entry.Payload,
csum)
}
}
}
for _, e := range entriesMangled {
if e.Entry.Type == storage.FileType {
rdr, err := fgp.Get(e.Entry.GetName())
if err != nil {
t.Error(err)
}
c := crc64.New(storage.CRCTable)
i, err := io.Copy(c, rdr)
if err != nil {
t.Fatal(err)
}
rdr.Close()
csum := c.Sum(nil)
if bytes.Equal(csum, e.Entry.Payload) {
t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v",
i,
e.Entry.GetName(),
csum)
}
}
}
}
var testCases = []struct {
path string
expectedSHA1Sum string
expectedSize int64
}{
{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
{"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240},
{"./testdata/extranils.tar.gz", "e187b4b3e739deaccc257342f4940f34403dc588", 10648},
{"./testdata/notenoughnils.tar.gz", "72f93f41efd95290baa5c174c234f5d4c22ce601", 512},
}
func TestTarStream(t *testing.T) {
for _, tc := range testCases {
fh, err := os.Open(tc.path)
if err != nil {
t.Fatal(err)
}
defer fh.Close()
gzRdr, err := gzip.NewReader(fh)
if err != nil {
t.Fatal(err)
}
defer gzRdr.Close()
// Setup where we'll store the metadata
w := bytes.NewBuffer([]byte{})
sp := storage.NewJSONPacker(w)
fgp := storage.NewBufferFileGetPutter()
// wrap the disassembly stream
tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
if err != nil {
t.Fatal(err)
}
// get a sum of the stream after it has passed through to ensure it's the same.
h0 := sha1.New()
i, err := io.Copy(h0, tarStream)
if err != nil {
t.Fatal(err)
}
if i != tc.expectedSize {
t.Errorf("size of tar: expected %d; got %d", tc.expectedSize, i)
}
if fmt.Sprintf("%x", h0.Sum(nil)) != tc.expectedSHA1Sum {
t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil))
}
//t.Logf("%s", w.String()) // if we fail, then show the packed info
// If we've made it this far, then we'll turn it around and create a tar
// stream from the packed metadata and buffered file contents.
r := bytes.NewBuffer(w.Bytes())
sup := storage.NewJSONUnpacker(r)
// and reuse the fgp that we Put the payloads to.
rc := NewOutputTarStream(fgp, sup)
h1 := sha1.New()
i, err = io.Copy(h1, rc)
if err != nil {
t.Fatal(err)
}
if i != tc.expectedSize {
t.Errorf("size of output tar: expected %d; got %d", tc.expectedSize, i)
}
if fmt.Sprintf("%x", h1.Sum(nil)) != tc.expectedSHA1Sum {
t.Fatalf("checksum of output tar: expected %s; got %x", tc.expectedSHA1Sum, h1.Sum(nil))
}
}
}
func BenchmarkAsm(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, tc := range testCases {
func() {
fh, err := os.Open(tc.path)
if err != nil {
b.Fatal(err)
}
defer fh.Close()
gzRdr, err := gzip.NewReader(fh)
if err != nil {
b.Fatal(err)
}
defer gzRdr.Close()
// Setup where we'll store the metadata
w := bytes.NewBuffer([]byte{})
sp := storage.NewJSONPacker(w)
fgp := storage.NewBufferFileGetPutter()
// wrap the disassembly stream
tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
if err != nil {
b.Fatal(err)
}
// read it all to the bit bucket
i1, err := io.Copy(ioutil.Discard, tarStream)
if err != nil {
b.Fatal(err)
}
r := bytes.NewBuffer(w.Bytes())
sup := storage.NewJSONUnpacker(r)
// and reuse the fgp that we Put the payloads to.
rc := NewOutputTarStream(fgp, sup)
i2, err := io.Copy(ioutil.Discard, rc)
if err != nil {
b.Fatal(err)
}
if i1 != i2 {
b.Errorf("%s: input(%d) and ouput(%d) byte count didn't match", tc.path, i1, i2)
}
}()
}
}
}

View File

@@ -1,72 +0,0 @@
package asm
import (
"archive/tar"
"fmt"
"io"
"io/ioutil"
"os"
"testing"
"github.com/vbatts/tar-split/tar/storage"
)
// This test failing causes the binary to crash due to memory overcommitment.
func TestLargeJunkPadding(t *testing.T) {
pR, pW := io.Pipe()
// Write a normal tar file into the pipe and then load it full of junk
// bytes as padding. We have to do this in a goroutine because we can't
// store 20GB of junk in-memory.
go func() {
// Empty archive.
tw := tar.NewWriter(pW)
if err := tw.Close(); err != nil {
pW.CloseWithError(err)
t.Fatal(err)
return
}
// Write junk.
const (
junkChunkSize = 64 * 1024 * 1024
junkChunkNum = 20 * 16
)
devZero, err := os.Open("/dev/zero")
if err != nil {
pW.CloseWithError(err)
t.Fatal(err)
return
}
defer devZero.Close()
for i := 0; i < junkChunkNum; i++ {
if i%32 == 0 {
fmt.Fprintf(os.Stderr, "[TestLargeJunkPadding] junk chunk #%d/#%d\n", i, junkChunkNum)
}
if _, err := io.CopyN(pW, devZero, junkChunkSize); err != nil {
pW.CloseWithError(err)
t.Fatal(err)
return
}
}
fmt.Fprintln(os.Stderr, "[TestLargeJunkPadding] junk chunk finished")
pW.Close()
}()
// Disassemble our junk file.
nilPacker := storage.NewJSONPacker(ioutil.Discard)
rdr, err := NewInputTarStream(pR, nilPacker, nil)
if err != nil {
t.Fatal(err)
}
// Copy the entire rdr.
_, err = io.Copy(ioutil.Discard, rdr)
if err != nil {
t.Fatal(err)
}
// At this point, if we haven't crashed then we are not vulnerable to
// CVE-2017-14992.
}

Binary file not shown.

View File

@@ -1,95 +0,0 @@
package storage
import (
"encoding/json"
"sort"
"testing"
)
func TestEntries(t *testing.T) {
e := Entries{
Entry{
Type: SegmentType,
Payload: []byte("y'all"),
Position: 1,
},
Entry{
Type: SegmentType,
Payload: []byte("doin"),
Position: 3,
},
Entry{
Type: FileType,
Name: "./hurr.txt",
Payload: []byte("deadbeef"),
Position: 2,
},
Entry{
Type: SegmentType,
Payload: []byte("how"),
Position: 0,
},
}
sort.Sort(e)
if e[0].Position != 0 {
t.Errorf("expected Position 0, but got %d", e[0].Position)
}
}
func TestFile(t *testing.T) {
f := Entry{
Type: FileType,
Size: 100,
Position: 2,
}
f.SetName("./hello.txt")
buf, err := json.Marshal(f)
if err != nil {
t.Fatal(err)
}
f1 := Entry{}
if err = json.Unmarshal(buf, &f1); err != nil {
t.Fatal(err)
}
if f.GetName() != f1.GetName() {
t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
}
if f.Size != f1.Size {
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
}
if f.Position != f1.Position {
t.Errorf("expected Position %q, got %q", f.Position, f1.Position)
}
}
func TestFileRaw(t *testing.T) {
f := Entry{
Type: FileType,
Size: 100,
Position: 2,
}
f.SetNameBytes([]byte{0x2E, 0x2F, 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xE4, 0x2E, 0x74, 0x78, 0x74})
buf, err := json.Marshal(f)
if err != nil {
t.Fatal(err)
}
f1 := Entry{}
if err = json.Unmarshal(buf, &f1); err != nil {
t.Fatal(err)
}
if f.GetName() != f1.GetName() {
t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
}
if f.Size != f1.Size {
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
}
if f.Position != f1.Position {
t.Errorf("expected Position %q, got %q", f.Position, f1.Position)
}
}

View File

@@ -1,84 +0,0 @@
package storage
import (
"bytes"
"fmt"
"io/ioutil"
"strings"
"testing"
)
func TestGetter(t *testing.T) {
fgp := NewBufferFileGetPutter()
files := map[string]map[string][]byte{
"file1.txt": {"foo": []byte{60, 60, 48, 48, 0, 0, 0, 0}},
"file2.txt": {"bar": []byte{45, 196, 22, 240, 0, 0, 0, 0}},
}
for n, b := range files {
for body, sum := range b {
_, csum, err := fgp.Put(n, bytes.NewBufferString(body))
if err != nil {
t.Error(err)
}
if !bytes.Equal(csum, sum) {
t.Errorf("checksum: expected 0x%x; got 0x%x", sum, csum)
}
}
}
for n, b := range files {
for body := range b {
r, err := fgp.Get(n)
if err != nil {
t.Error(err)
}
buf, err := ioutil.ReadAll(r)
if err != nil {
t.Error(err)
}
if body != string(buf) {
t.Errorf("expected %q, got %q", body, string(buf))
}
}
}
}
func TestPutter(t *testing.T) {
fp := NewDiscardFilePutter()
// map[filename]map[body]crc64sum
files := map[string]map[string][]byte{
"file1.txt": {"foo": []byte{60, 60, 48, 48, 0, 0, 0, 0}},
"file2.txt": {"bar": []byte{45, 196, 22, 240, 0, 0, 0, 0}},
"file3.txt": {"baz": []byte{32, 68, 22, 240, 0, 0, 0, 0}},
"file4.txt": {"bif": []byte{48, 9, 150, 240, 0, 0, 0, 0}},
}
for n, b := range files {
for body, sum := range b {
_, csum, err := fp.Put(n, bytes.NewBufferString(body))
if err != nil {
t.Error(err)
}
if !bytes.Equal(csum, sum) {
t.Errorf("checksum on %q: expected %v; got %v", n, sum, csum)
}
}
}
}
func BenchmarkPutter(b *testing.B) {
files := []string{
strings.Repeat("foo", 1000),
strings.Repeat("bar", 1000),
strings.Repeat("baz", 1000),
strings.Repeat("fooz", 1000),
strings.Repeat("vbatts", 1000),
strings.Repeat("systemd", 1000),
}
for i := 0; i < b.N; i++ {
fgp := NewBufferFileGetPutter()
for n, body := range files {
if _, _, err := fgp.Put(fmt.Sprintf("%d", n), bytes.NewBufferString(body)); err != nil {
b.Fatal(err)
}
}
}
}

View File

@@ -1,218 +0,0 @@
package storage
import (
"bytes"
"compress/gzip"
"io"
"io/ioutil"
"os"
"testing"
)
func TestDuplicateFail(t *testing.T) {
e := []Entry{
Entry{
Type: FileType,
Name: "./hurr.txt",
Payload: []byte("abcde"),
},
Entry{
Type: FileType,
Name: "./hurr.txt",
Payload: []byte("deadbeef"),
},
Entry{
Type: FileType,
Name: "hurr.txt", // slightly different path, same file though
Payload: []byte("deadbeef"),
},
}
buf := []byte{}
b := bytes.NewBuffer(buf)
jp := NewJSONPacker(b)
if _, err := jp.AddEntry(e[0]); err != nil {
t.Error(err)
}
if _, err := jp.AddEntry(e[1]); err != ErrDuplicatePath {
t.Errorf("expected failure on duplicate path")
}
if _, err := jp.AddEntry(e[2]); err != ErrDuplicatePath {
t.Errorf("expected failure on duplicate path")
}
}
func TestJSONPackerUnpacker(t *testing.T) {
e := []Entry{
Entry{
Type: SegmentType,
Payload: []byte("how"),
},
Entry{
Type: SegmentType,
Payload: []byte("y'all"),
},
Entry{
Type: FileType,
Name: "./hurr.txt",
Payload: []byte("deadbeef"),
},
Entry{
Type: SegmentType,
Payload: []byte("doin"),
},
}
buf := []byte{}
b := bytes.NewBuffer(buf)
func() {
jp := NewJSONPacker(b)
for i := range e {
if _, err := jp.AddEntry(e[i]); err != nil {
t.Error(err)
}
}
}()
// >> packer_test.go:43: uncompressed: 266
//t.Errorf("uncompressed: %d", len(b.Bytes()))
b = bytes.NewBuffer(b.Bytes())
entries := Entries{}
func() {
jup := NewJSONUnpacker(b)
for {
entry, err := jup.Next()
if err != nil {
if err == io.EOF {
break
}
t.Error(err)
}
entries = append(entries, *entry)
t.Logf("got %#v", entry)
}
}()
if len(entries) != len(e) {
t.Errorf("expected %d entries, got %d", len(e), len(entries))
}
}
// you can use a compress Reader/Writer and make nice savings.
//
// For these two tests that are using the same set, it the difference of 266
// bytes uncompressed vs 138 bytes compressed.
func TestGzip(t *testing.T) {
e := []Entry{
Entry{
Type: SegmentType,
Payload: []byte("how"),
},
Entry{
Type: SegmentType,
Payload: []byte("y'all"),
},
Entry{
Type: FileType,
Name: "./hurr.txt",
Payload: []byte("deadbeef"),
},
Entry{
Type: SegmentType,
Payload: []byte("doin"),
},
}
buf := []byte{}
b := bytes.NewBuffer(buf)
gzW := gzip.NewWriter(b)
jp := NewJSONPacker(gzW)
for i := range e {
if _, err := jp.AddEntry(e[i]); err != nil {
t.Error(err)
}
}
gzW.Close()
// >> packer_test.go:99: compressed: 138
//t.Errorf("compressed: %d", len(b.Bytes()))
b = bytes.NewBuffer(b.Bytes())
gzR, err := gzip.NewReader(b)
if err != nil {
t.Fatal(err)
}
entries := Entries{}
func() {
jup := NewJSONUnpacker(gzR)
for {
entry, err := jup.Next()
if err != nil {
if err == io.EOF {
break
}
t.Error(err)
}
entries = append(entries, *entry)
t.Logf("got %#v", entry)
}
}()
if len(entries) != len(e) {
t.Errorf("expected %d entries, got %d", len(e), len(entries))
}
}
func BenchmarkGetPut(b *testing.B) {
e := []Entry{
Entry{
Type: SegmentType,
Payload: []byte("how"),
},
Entry{
Type: SegmentType,
Payload: []byte("y'all"),
},
Entry{
Type: FileType,
Name: "./hurr.txt",
Payload: []byte("deadbeef"),
},
Entry{
Type: SegmentType,
Payload: []byte("doin"),
},
}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
func() {
fh, err := ioutil.TempFile("", "tar-split.")
if err != nil {
b.Fatal(err)
}
defer os.Remove(fh.Name())
defer fh.Close()
jp := NewJSONPacker(fh)
for i := range e {
if _, err := jp.AddEntry(e[i]); err != nil {
b.Fatal(err)
}
}
fh.Sync()
up := NewJSONUnpacker(fh)
for {
_, err := up.Next()
if err != nil {
if err == io.EOF {
break
}
b.Fatal(err)
}
}
}()
}
})
}