aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/datamaps/reader.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/datamaps/reader.go')
-rw-r--r--pkg/datamaps/reader.go293
1 files changed, 0 insertions, 293 deletions
diff --git a/pkg/datamaps/reader.go b/pkg/datamaps/reader.go
deleted file mode 100644
index 25e6c99..0000000
--- a/pkg/datamaps/reader.go
+++ /dev/null
@@ -1,293 +0,0 @@
-/* datmamaps packages handles datamap files and populated spreadsheets.
- */
-
-package datamaps
-
-import (
- "database/sql"
- "encoding/csv"
- "errors"
- "fmt"
- "io"
- "io/ioutil"
- "log"
- "path/filepath"
- "strings"
-
- // Required for the sqlite3 driver
- _ "github.com/mattn/go-sqlite3"
-
- "github.com/tealeg/xlsx/v3"
-)
-
-type (
- // sheetData is the data from the sheet.
- sheetData map[string]extractedCell
-
- // FileData is the data from the file.
- FileData map[string]sheetData
-
- // ExtractedData is the Extracted data from the file, filtered by a Datamap.
- ExtractedData map[string]map[string]xlsx.Cell
-)
-
-//datamapLine - a line from the datamap.
-type datamapLine struct {
- Key string
- Sheet string
- Cellref string
-}
-
-//extractedCell is data pulled from a cell.
-type extractedCell struct {
- Cell *xlsx.Cell
- Col string
- Row int
- Value string
-}
-
-var (
- inner = make(sheetData)
-)
-
-// ExtractedDatamapFile is a slice of datamapLine structs, each of which encodes a single line
-// in the datamap file/database table.
-type ExtractedDatamapFile []datamapLine
-
-//sheetInSlice is a helper which returns true
-// if a string is in a slice of strings.
-func sheetInSlice(list []string, key string) bool {
- for _, x := range list {
- if x == key {
- return true
- }
- }
-
- return false
-}
-
-//getSheetNames returns the number of Sheet field entries
-// in a slice of datamapLine structs.
-func getSheetNames(dmls ExtractedDatamapFile) []string {
- var sheetNames []string
-
- for _, dml := range dmls {
- if !sheetInSlice(sheetNames, dml.Sheet) {
- sheetNames = append(sheetNames, dml.Sheet)
- }
- }
-
- return sheetNames
-}
-
-// ReadDML returns a slice of datamapLine structs given a
-// path to a datamap file.
-func ReadDML(path string) (ExtractedDatamapFile, error) {
- var s ExtractedDatamapFile
-
- data, err := ioutil.ReadFile(path)
-
- if err != nil {
- return s, fmt.Errorf("Cannot find file: %s", path)
- }
-
- r := csv.NewReader(strings.NewReader(string(data)))
-
- for {
- record, err := r.Read()
- if err == io.EOF {
- break
- }
-
- if err != nil {
- return s, errors.New("Cannot read line %s")
- }
-
- if record[0] == "cell_key" {
- // this must be the header
- continue
- }
-
- dml := datamapLine{
- Key: strings.Trim(record[0], " "),
- Sheet: strings.Trim(record[1], " "),
- Cellref: strings.Trim(record[2], " ")}
- s = append(s, dml)
- }
-
- return s, nil
-}
-
-// cellVisitor is used by datamaps.rowVisitor() and is called
-// on every cell in the target xlsx file in order to extract
-// the data.
-func cellVisitor(c *xlsx.Cell) error {
- x, y := c.GetCoordinates()
- cellref := xlsx.GetCellIDStringFromCoords(x, y)
-
- // TODO: we need to store the c.NumFmt value here in the
- // database so we can reply it again when we write the values
- // to the master or elsewhere. We should keep the value itself
- // in its unformatted state - i.e a date being something like 488594.
-
- ex := extractedCell{
- Cell: c,
- Value: c.Value,
- }
-
- inner[cellref] = ex
-
- return nil
-}
-
-// rowVisitor is used as a callback by xlsx.sheet.ForEachRow(). It wraps
-// a call to xlsx.Row.ForEachCell() which actually extracts the data.
-func rowVisitor(r *xlsx.Row) error {
- if err := r.ForEachCell(cellVisitor, xlsx.SkipEmptyCells); err != nil {
- return err
- }
- return nil
-}
-
-// ReadXLSX returns a file at path's data as a map,
-// keyed on sheet name. All values are returned as strings.
-// Paths to a datamap and the spreadsheet file required.
-func ReadXLSX(path string) FileData {
- wb, err := xlsx.OpenFile(path)
- if err != nil {
- fmt.Errorf("cannot open file at %s - %v", path, err)
- }
-
- outer := make(FileData, 1)
-
- // get the data
- for _, sheet := range wb.Sheets {
-
- if err := sheet.ForEachRow(rowVisitor); err != nil {
- fmt.Errorf("cannot call ForEachRow() in sheet %s - %v", sheet.Name, err)
- }
- outer[sheet.Name] = inner
- inner = make(sheetData)
- }
-
- return outer
-}
-
-// DatamapFromDB creates an ExtractedDatamapFile from the database given
-// the name of a datamap. Of course, in this instance, the data is not
-// coming from a datamap file (such as datamap.csv) but from datamap data
-// previous stored in the database by DatamapToDB or similar.
-func DatamapFromDB(name string, db *sql.DB) (ExtractedDatamapFile, error) {
-
- var out ExtractedDatamapFile
-
- query := `
- select
- key, sheet, cellref
- from datamap_line
- join datamap on datamap_line.dm_id = datamap.id where datamap.name = ?;
- `
- rows, err := db.Query(query, name)
- if err != nil {
- erstr := fmt.Sprintf("cannot query for datamap key, sheet and cellref - %v", err)
- return nil, errors.New(erstr)
- }
- defer rows.Close()
-
- for rows.Next() {
- var (
- key string
- sheet string
- cellref string
- )
- if err := rows.Scan(&key, &sheet, &cellref); err != nil {
- return nil, err
- }
-
- out = append(out, datamapLine{Key: key, Sheet: sheet, Cellref: cellref})
- }
-
- return out, nil
-}
-
-// ExtractDBDatamap uses a datamap named from the database db to extract values
-// from the populated spreadsheet file file.
-func ExtractDBDatamap(name string, file string, db *sql.DB) (ExtractedData, error) {
- ddata, err := DatamapFromDB(name, db) // this will need to return an ExtractedDatamapFile
- if err != nil {
- erstr := fmt.Sprintf("cannot call DatamapFromDB() - %v", err)
- return nil, errors.New(erstr)
- }
- if len(ddata) == 0 {
- return nil, fmt.Errorf("there is no datamap in the database matching name '%s'. Try running 'datamaps datamap --import...'", name)
- }
- xdata := ReadXLSX(file)
-
- names := getSheetNames(ddata)
- outer := make(ExtractedData, len(names))
- // var inner map[string]xlsx.Cell
-
- for _, s := range names {
- outer[s] = make(map[string]xlsx.Cell)
- }
-
- for _, i := range ddata {
- sheet := i.Sheet
- cellref := i.Cellref
-
- if val, ok := xdata[sheet][cellref]; ok {
- outer[sheet][cellref] = *val.Cell
- }
- }
-
- return outer, nil
-}
-
-// extract returns the file at path's data as a map,
-// using the datamap as a filter, keyed on sheet name. All values
-// are returned as strings. (Currently deprecated in favour of
-// ExtractDBDatamap.
-func extract(dm string, path string) ExtractedData {
- xdata := ReadXLSX(path)
- ddata, err := ReadDML(dm)
-
- if err != nil {
- log.Fatal(err)
- }
-
- names := getSheetNames(ddata)
- outer := make(ExtractedData, len(names))
- inner := make(map[string]xlsx.Cell)
-
- for _, i := range ddata {
- sheet := i.Sheet
- cellref := i.Cellref
-
- if val, ok := xdata[sheet][cellref]; ok {
- inner[cellref] = *val.Cell
- outer[sheet] = inner
- }
- }
-
- return outer
-}
-
-//getTargetFiles finds all xlsx and xlsm files in directory.
-func getTargetFiles(path string) ([]string, error) {
- if lastchar := path[len(path)-1:]; lastchar != string(filepath.Separator) {
- return nil, fmt.Errorf("path must end in a %s character", string(filepath.Separator))
- }
-
- fullpath := strings.Join([]string{path, "*.xls[xm]"}, "")
- output, err := filepath.Glob(fullpath)
-
- if err != nil {
- return nil, err
- }
-
- if output == nil {
- return nil, fmt.Errorf("cannot find any xlsx files in %s", path)
- }
-
- return output, nil
-}