diff options
author | Matthew Lemon <lemon@matthewlemon.com> | 2020-07-19 15:04:41 +0100 |
---|---|---|
committer | Matthew Lemon <lemon@matthewlemon.com> | 2020-07-19 15:04:41 +0100 |
commit | bd587a070753c3f95ff921c83939bdec3a1284b9 (patch) | |
tree | 391ba3c7ec71b648212a7c2e5ada77d79a39b161 /pkg/reader/reader.go | |
parent | 1db96177350b8d785be2728f3d44907c8998d7b0 (diff) |
restructure
Diffstat (limited to 'pkg/reader/reader.go')
-rw-r--r-- | pkg/reader/reader.go | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/pkg/reader/reader.go b/pkg/reader/reader.go new file mode 100644 index 0000000..bdca4dc --- /dev/null +++ b/pkg/reader/reader.go @@ -0,0 +1,169 @@ +package reader + +import ( + "encoding/csv" + "errors" + "fmt" + "io" + "io/ioutil" + "log" + "path/filepath" + "strings" + + _ "github.com/mattn/go-sqlite3" + + "github.com/tealeg/xlsx" + "github.com/yulqen/coords" +) + +type ( + // SheetData is the data from the sheet + SheetData map[string]ExtractedCell + // FileData is the data from the file. + FileData map[string]SheetData + // ExtractedData is the Extraced data from the file, filtered by a Datamap. + ExtractedData map[string]map[string]xlsx.Cell +) + +//DatamapLine - a line from the datamap. +type DatamapLine struct { + Key string + Sheet string + Cellref string +} + +//ExtractedCell is data pulled from a cell. +type ExtractedCell struct { + Cell *xlsx.Cell + Col string + Row int + Value string +} + +//sheetInSlice is a helper which returns true +// if a string is in a slice of strings. +func sheetInSlice(list []string, key string) bool { + for _, x := range list { + if x == key { + return true + } + } + return false +} + +//getSheetNames returns the number of Sheet field entries +// in a slice of DatamapLine structs. +func getSheetNames(dmls []DatamapLine) []string { + var sheetNames []string + for _, dml := range dmls { + if sheetInSlice(sheetNames, dml.Sheet) == false { + sheetNames = append(sheetNames, dml.Sheet) + } + } + return sheetNames +} + +//ReadDML returns a slice of DatamapLine structs. +func ReadDML(path string) ([]DatamapLine, error) { + var s []DatamapLine + data, err := ioutil.ReadFile(path) + if err != nil { + return s, fmt.Errorf("Cannot find file: %s", path) + } + r := csv.NewReader(strings.NewReader(string(data))) + for { + record, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + return s, errors.New("Cannot read line %s") + } + if record[0] == "cell_key" { + // this must be the header + continue + } + dml := DatamapLine{ + Key: strings.Trim(record[0], " "), + Sheet: strings.Trim(record[1], " "), + Cellref: strings.Trim(record[2], " ")} + s = append(s, dml) + } + return s, nil +} + +//ReadXLSX returns the file's data as a map, +// keyed on sheet name. All values are returned as strings. +// Paths to a datamap and the spreadsheet file required. +func ReadXLSX(ssheet string) FileData { + + // open the files + data, err := xlsx.OpenFile(ssheet) + if err != nil { + log.Fatal(err) + } + outer := make(FileData, 1) + + // get the data + for _, sheet := range data.Sheets { + inner := make(SheetData) + for rowLidx, row := range sheet.Rows { + for colLidx, cell := range row.Cells { + colStr, err := coords.ColIndexToAlpha(colLidx) + if err != nil { + log.Fatal(err) + } + ex := ExtractedCell{ + Cell: cell, + Col: colStr, + Row: rowLidx + 1, + Value: cell.Value} + cellref := fmt.Sprintf("%s%d", ex.Col, ex.Row) + inner[cellref] = ex + } + outer[sheet.Name] = inner + } + } + return outer +} + +//Extract returns the file's data as a map, +// using the datamap as a filter, keyed on sheet name. All values +// are returned as strings. +// Paths to a datamap and the spreadsheet file required. +func Extract(dm string, ssheet string) ExtractedData { + xdata := ReadXLSX(ssheet) + ddata, err := ReadDML(dm) + if err != nil { + log.Fatal(err) + } + names := getSheetNames(ddata) + outer := make(ExtractedData, len(names)) + inner := make(map[string]xlsx.Cell) + + for _, i := range ddata { + sheet := i.Sheet + cellref := i.Cellref + if val, ok := xdata[sheet][cellref]; ok { + inner[cellref] = *val.Cell + outer[sheet] = inner + } + } + return outer +} + +//GetTargetFiles finds all xlsx and xlsm files in directory. +func GetTargetFiles(path string) ([]string, error) { + if lastchar := path[len(path)-1:]; lastchar != string(filepath.Separator) { + return nil, fmt.Errorf("path must end in a %s character", string(filepath.Separator)) + } + fullpath := strings.Join([]string{path, "*.xlsx"}, "") + output, err := filepath.Glob(fullpath) + if err != nil { + return nil, err + } + if output == nil { + return nil, fmt.Errorf("cannot find any xlsx files in %s", path) + } + return output, nil +} |