aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/datamaps/reader.go
diff options
context:
space:
mode:
authorMatthew Lemon <lemon@matthewlemon.com>2020-07-22 21:01:54 +0100
committerMatthew Lemon <lemon@matthewlemon.com>2020-07-22 21:01:54 +0100
commitd7d4d0a0476f98d1b0b4c5c454b6d1048a82bdad (patch)
tree11aaba3bab132e3f7d13f73c61d8b14739794f50 /pkg/datamaps/reader.go
parentc3d4c6bb51a3d7b89e0fcbb69230db423577ae17 (diff)
more major surgery
Diffstat (limited to 'pkg/datamaps/reader.go')
-rw-r--r--pkg/datamaps/reader.go172
1 files changed, 172 insertions, 0 deletions
diff --git a/pkg/datamaps/reader.go b/pkg/datamaps/reader.go
new file mode 100644
index 0000000..b67ad3c
--- /dev/null
+++ b/pkg/datamaps/reader.go
@@ -0,0 +1,172 @@
+/*
+reader reads datamap files and populated spreadsheets
+*/
+package datamaps
+
+import (
+ "encoding/csv"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "path/filepath"
+ "strings"
+
+ _ "github.com/mattn/go-sqlite3"
+
+ "github.com/tealeg/xlsx"
+ "github.com/yulqen/coords"
+)
+
+type (
+ // SheetData is the data from the sheet
+ SheetData map[string]ExtractedCell
+ // FileData is the data from the file.
+ FileData map[string]SheetData
+ // ExtractedData is the Extraced data from the file, filtered by a Datamap.
+ ExtractedData map[string]map[string]xlsx.Cell
+)
+
+//DatamapLine - a line from the datamap.
+type DatamapLine struct {
+ Key string
+ Sheet string
+ Cellref string
+}
+
+//ExtractedCell is data pulled from a cell.
+type ExtractedCell struct {
+ Cell *xlsx.Cell
+ Col string
+ Row int
+ Value string
+}
+
+//sheetInSlice is a helper which returns true
+// if a string is in a slice of strings.
+func sheetInSlice(list []string, key string) bool {
+ for _, x := range list {
+ if x == key {
+ return true
+ }
+ }
+ return false
+}
+
+//getSheetNames returns the number of Sheet field entries
+// in a slice of DatamapLine structs.
+func getSheetNames(dmls []DatamapLine) []string {
+ var sheetNames []string
+ for _, dml := range dmls {
+ if sheetInSlice(sheetNames, dml.Sheet) == false {
+ sheetNames = append(sheetNames, dml.Sheet)
+ }
+ }
+ return sheetNames
+}
+
+//ReadDML returns a slice of DatamapLine structs.
+func ReadDML(path string) ([]DatamapLine, error) {
+ var s []DatamapLine
+ data, err := ioutil.ReadFile(path)
+ if err != nil {
+ return s, fmt.Errorf("Cannot find file: %s", path)
+ }
+ r := csv.NewReader(strings.NewReader(string(data)))
+ for {
+ record, err := r.Read()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return s, errors.New("Cannot read line %s")
+ }
+ if record[0] == "cell_key" {
+ // this must be the header
+ continue
+ }
+ dml := DatamapLine{
+ Key: strings.Trim(record[0], " "),
+ Sheet: strings.Trim(record[1], " "),
+ Cellref: strings.Trim(record[2], " ")}
+ s = append(s, dml)
+ }
+ return s, nil
+}
+
+//ReadXLSX returns the file's data as a map,
+// keyed on sheet name. All values are returned as strings.
+// Paths to a datamap and the spreadsheet file required.
+func ReadXLSX(ssheet string) FileData {
+
+ // open the files
+ data, err := xlsx.OpenFile(ssheet)
+ if err != nil {
+ log.Fatal(err)
+ }
+ outer := make(FileData, 1)
+
+ // get the data
+ for _, sheet := range data.Sheets {
+ inner := make(SheetData)
+ for rowLidx, row := range sheet.Rows {
+ for colLidx, cell := range row.Cells {
+ colStr, err := coords.ColIndexToAlpha(colLidx)
+ if err != nil {
+ log.Fatal(err)
+ }
+ ex := ExtractedCell{
+ Cell: cell,
+ Col: colStr,
+ Row: rowLidx + 1,
+ Value: cell.Value}
+ cellref := fmt.Sprintf("%s%d", ex.Col, ex.Row)
+ inner[cellref] = ex
+ }
+ outer[sheet.Name] = inner
+ }
+ }
+ return outer
+}
+
+//Extract returns the file's data as a map,
+// using the datamap as a filter, keyed on sheet name. All values
+// are returned as strings.
+// Paths to a datamap and the spreadsheet file required.
+func Extract(dm string, ssheet string) ExtractedData {
+ xdata := ReadXLSX(ssheet)
+ ddata, err := ReadDML(dm)
+ if err != nil {
+ log.Fatal(err)
+ }
+ names := getSheetNames(ddata)
+ outer := make(ExtractedData, len(names))
+ inner := make(map[string]xlsx.Cell)
+
+ for _, i := range ddata {
+ sheet := i.Sheet
+ cellref := i.Cellref
+ if val, ok := xdata[sheet][cellref]; ok {
+ inner[cellref] = *val.Cell
+ outer[sheet] = inner
+ }
+ }
+ return outer
+}
+
+//GetTargetFiles finds all xlsx and xlsm files in directory.
+func GetTargetFiles(path string) ([]string, error) {
+ if lastchar := path[len(path)-1:]; lastchar != string(filepath.Separator) {
+ return nil, fmt.Errorf("path must end in a %s character", string(filepath.Separator))
+ }
+ fullpath := strings.Join([]string{path, "*.xlsx"}, "")
+ output, err := filepath.Glob(fullpath)
+ if err != nil {
+ return nil, err
+ }
+ if output == nil {
+ return nil, fmt.Errorf("cannot find any xlsx files in %s", path)
+ }
+ return output, nil
+}