aboutsummaryrefslogtreecommitdiffstats
path: root/datamaps/reader.go
diff options
context:
space:
mode:
authorMatthew Lemon <lemon@matthewlemon.com>2020-12-08 13:35:39 +0000
committerMatthew Lemon <lemon@matthewlemon.com>2020-12-08 13:35:39 +0000
commit4457cd61f10565c5fa1abafc5e763fb676c4293f (patch)
tree85903a7131eac2d6b7b10f0ef9d8fb2f123c01fe /datamaps/reader.go
parent4c6a364551196833aef9d4ad74ec29510f5068ae (diff)
fixed missing datamaps pkg
Diffstat (limited to 'datamaps/reader.go')
-rw-r--r--datamaps/reader.go293
1 files changed, 293 insertions, 0 deletions
diff --git a/datamaps/reader.go b/datamaps/reader.go
new file mode 100644
index 0000000..25e6c99
--- /dev/null
+++ b/datamaps/reader.go
@@ -0,0 +1,293 @@
+/* datmamaps packages handles datamap files and populated spreadsheets.
+ */
+
+package datamaps
+
+import (
+ "database/sql"
+ "encoding/csv"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "path/filepath"
+ "strings"
+
+ // Required for the sqlite3 driver
+ _ "github.com/mattn/go-sqlite3"
+
+ "github.com/tealeg/xlsx/v3"
+)
+
+type (
+ // sheetData is the data from the sheet.
+ sheetData map[string]extractedCell
+
+ // FileData is the data from the file.
+ FileData map[string]sheetData
+
+ // ExtractedData is the Extracted data from the file, filtered by a Datamap.
+ ExtractedData map[string]map[string]xlsx.Cell
+)
+
+//datamapLine - a line from the datamap.
+type datamapLine struct {
+ Key string
+ Sheet string
+ Cellref string
+}
+
+//extractedCell is data pulled from a cell.
+type extractedCell struct {
+ Cell *xlsx.Cell
+ Col string
+ Row int
+ Value string
+}
+
+var (
+ inner = make(sheetData)
+)
+
+// ExtractedDatamapFile is a slice of datamapLine structs, each of which encodes a single line
+// in the datamap file/database table.
+type ExtractedDatamapFile []datamapLine
+
+//sheetInSlice is a helper which returns true
+// if a string is in a slice of strings.
+func sheetInSlice(list []string, key string) bool {
+ for _, x := range list {
+ if x == key {
+ return true
+ }
+ }
+
+ return false
+}
+
+//getSheetNames returns the number of Sheet field entries
+// in a slice of datamapLine structs.
+func getSheetNames(dmls ExtractedDatamapFile) []string {
+ var sheetNames []string
+
+ for _, dml := range dmls {
+ if !sheetInSlice(sheetNames, dml.Sheet) {
+ sheetNames = append(sheetNames, dml.Sheet)
+ }
+ }
+
+ return sheetNames
+}
+
+// ReadDML returns a slice of datamapLine structs given a
+// path to a datamap file.
+func ReadDML(path string) (ExtractedDatamapFile, error) {
+ var s ExtractedDatamapFile
+
+ data, err := ioutil.ReadFile(path)
+
+ if err != nil {
+ return s, fmt.Errorf("Cannot find file: %s", path)
+ }
+
+ r := csv.NewReader(strings.NewReader(string(data)))
+
+ for {
+ record, err := r.Read()
+ if err == io.EOF {
+ break
+ }
+
+ if err != nil {
+ return s, errors.New("Cannot read line %s")
+ }
+
+ if record[0] == "cell_key" {
+ // this must be the header
+ continue
+ }
+
+ dml := datamapLine{
+ Key: strings.Trim(record[0], " "),
+ Sheet: strings.Trim(record[1], " "),
+ Cellref: strings.Trim(record[2], " ")}
+ s = append(s, dml)
+ }
+
+ return s, nil
+}
+
+// cellVisitor is used by datamaps.rowVisitor() and is called
+// on every cell in the target xlsx file in order to extract
+// the data.
+func cellVisitor(c *xlsx.Cell) error {
+ x, y := c.GetCoordinates()
+ cellref := xlsx.GetCellIDStringFromCoords(x, y)
+
+ // TODO: we need to store the c.NumFmt value here in the
+ // database so we can reply it again when we write the values
+ // to the master or elsewhere. We should keep the value itself
+ // in its unformatted state - i.e a date being something like 488594.
+
+ ex := extractedCell{
+ Cell: c,
+ Value: c.Value,
+ }
+
+ inner[cellref] = ex
+
+ return nil
+}
+
+// rowVisitor is used as a callback by xlsx.sheet.ForEachRow(). It wraps
+// a call to xlsx.Row.ForEachCell() which actually extracts the data.
+func rowVisitor(r *xlsx.Row) error {
+ if err := r.ForEachCell(cellVisitor, xlsx.SkipEmptyCells); err != nil {
+ return err
+ }
+ return nil
+}
+
+// ReadXLSX returns a file at path's data as a map,
+// keyed on sheet name. All values are returned as strings.
+// Paths to a datamap and the spreadsheet file required.
+func ReadXLSX(path string) FileData {
+ wb, err := xlsx.OpenFile(path)
+ if err != nil {
+ fmt.Errorf("cannot open file at %s - %v", path, err)
+ }
+
+ outer := make(FileData, 1)
+
+ // get the data
+ for _, sheet := range wb.Sheets {
+
+ if err := sheet.ForEachRow(rowVisitor); err != nil {
+ fmt.Errorf("cannot call ForEachRow() in sheet %s - %v", sheet.Name, err)
+ }
+ outer[sheet.Name] = inner
+ inner = make(sheetData)
+ }
+
+ return outer
+}
+
+// DatamapFromDB creates an ExtractedDatamapFile from the database given
+// the name of a datamap. Of course, in this instance, the data is not
+// coming from a datamap file (such as datamap.csv) but from datamap data
+// previous stored in the database by DatamapToDB or similar.
+func DatamapFromDB(name string, db *sql.DB) (ExtractedDatamapFile, error) {
+
+ var out ExtractedDatamapFile
+
+ query := `
+ select
+ key, sheet, cellref
+ from datamap_line
+ join datamap on datamap_line.dm_id = datamap.id where datamap.name = ?;
+ `
+ rows, err := db.Query(query, name)
+ if err != nil {
+ erstr := fmt.Sprintf("cannot query for datamap key, sheet and cellref - %v", err)
+ return nil, errors.New(erstr)
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var (
+ key string
+ sheet string
+ cellref string
+ )
+ if err := rows.Scan(&key, &sheet, &cellref); err != nil {
+ return nil, err
+ }
+
+ out = append(out, datamapLine{Key: key, Sheet: sheet, Cellref: cellref})
+ }
+
+ return out, nil
+}
+
+// ExtractDBDatamap uses a datamap named from the database db to extract values
+// from the populated spreadsheet file file.
+func ExtractDBDatamap(name string, file string, db *sql.DB) (ExtractedData, error) {
+ ddata, err := DatamapFromDB(name, db) // this will need to return an ExtractedDatamapFile
+ if err != nil {
+ erstr := fmt.Sprintf("cannot call DatamapFromDB() - %v", err)
+ return nil, errors.New(erstr)
+ }
+ if len(ddata) == 0 {
+ return nil, fmt.Errorf("there is no datamap in the database matching name '%s'. Try running 'datamaps datamap --import...'", name)
+ }
+ xdata := ReadXLSX(file)
+
+ names := getSheetNames(ddata)
+ outer := make(ExtractedData, len(names))
+ // var inner map[string]xlsx.Cell
+
+ for _, s := range names {
+ outer[s] = make(map[string]xlsx.Cell)
+ }
+
+ for _, i := range ddata {
+ sheet := i.Sheet
+ cellref := i.Cellref
+
+ if val, ok := xdata[sheet][cellref]; ok {
+ outer[sheet][cellref] = *val.Cell
+ }
+ }
+
+ return outer, nil
+}
+
+// extract returns the file at path's data as a map,
+// using the datamap as a filter, keyed on sheet name. All values
+// are returned as strings. (Currently deprecated in favour of
+// ExtractDBDatamap.
+func extract(dm string, path string) ExtractedData {
+ xdata := ReadXLSX(path)
+ ddata, err := ReadDML(dm)
+
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ names := getSheetNames(ddata)
+ outer := make(ExtractedData, len(names))
+ inner := make(map[string]xlsx.Cell)
+
+ for _, i := range ddata {
+ sheet := i.Sheet
+ cellref := i.Cellref
+
+ if val, ok := xdata[sheet][cellref]; ok {
+ inner[cellref] = *val.Cell
+ outer[sheet] = inner
+ }
+ }
+
+ return outer
+}
+
+//getTargetFiles finds all xlsx and xlsm files in directory.
+func getTargetFiles(path string) ([]string, error) {
+ if lastchar := path[len(path)-1:]; lastchar != string(filepath.Separator) {
+ return nil, fmt.Errorf("path must end in a %s character", string(filepath.Separator))
+ }
+
+ fullpath := strings.Join([]string{path, "*.xls[xm]"}, "")
+ output, err := filepath.Glob(fullpath)
+
+ if err != nil {
+ return nil, err
+ }
+
+ if output == nil {
+ return nil, fmt.Errorf("cannot find any xlsx files in %s", path)
+ }
+
+ return output, nil
+}