aboutsummaryrefslogblamecommitdiffstats
path: root/pkg/datamaps/reader.go
blob: 25e6c99e70c8fa54557b5431495b11899d5d23cb (plain) (tree)
1
2
3
4
5
6
7
8
9

                                                                       
 
                

        
                      
                      
                


                   
             
                       

                 
                                          

                                       
                                   

 
      

                                                
 

                                              
 

                                                                                    

 

                                        




                      

                                           



                        

 

                               

 


                                                                                              
 

                                             





                                                   
 


                    
                                                         
                                     
                                                        
                               
 
                                  
                                                         


                                                                  
 
                         

 
                                                         
                          

                                                         
 
                                          
 
                       
                                                                  
         
 
                                                           
 




                                       
 
                               
                                                                   
                 
 



                                                  
 
                                   


                                                              
                                  
         
 
                     

 






                                                             




                                                                             


















                                                                               
                                                   

                                                           
                                     
                                      
                       
                                                                    
         
 
                                  
 
                       


                                                                    
                                                                                                
                 

                                         
         
 
                    
 
 
                                                                        


                                                                         
                                                                           
 
                                    
 







                                                                                       

                                                                                                 









                                                                         
                                       
                 
 
                                                                                        
         
 
                       

 
                                                                               
                                            

                                                                                                 
                       

                                                                             
         
                            
                                                                                                                                                  

                               

                                     
                                                
                                         
 


                                                     





                                                         
                                                         


                 
                         
 
 
                                                    
                                                                 

                                                              
                                                    
                               
                                 
 


                              
 
                                     
                                                

                                           
                                 

                                    
 
                                                         
                                                  
                                            

                 
 
                    
 
 

                                                            


                                                                                                     
 
                                                                 
                                              
 


                               
 
                          
                                                                                
         
 

                          
/* datmamaps packages handles datamap files and populated spreadsheets.
 */

package datamaps

import (
	"database/sql"
	"encoding/csv"
	"errors"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"path/filepath"
	"strings"

	// Required for the sqlite3 driver
	_ "github.com/mattn/go-sqlite3"

	"github.com/tealeg/xlsx/v3"
)

type (
	// sheetData is the data from the sheet.
	sheetData map[string]extractedCell

	// FileData is the data from the file.
	FileData map[string]sheetData

	// ExtractedData is the Extracted data from the file, filtered by a Datamap.
	ExtractedData map[string]map[string]xlsx.Cell
)

//datamapLine - a line from the datamap.
type datamapLine struct {
	Key     string
	Sheet   string
	Cellref string
}

//extractedCell is data pulled from a cell.
type extractedCell struct {
	Cell  *xlsx.Cell
	Col   string
	Row   int
	Value string
}

var (
	inner = make(sheetData)
)

// ExtractedDatamapFile is a slice of datamapLine structs, each of which encodes a single line
// in the datamap file/database table.
type ExtractedDatamapFile []datamapLine

//sheetInSlice is a helper which returns true
// if a string is in a slice of strings.
func sheetInSlice(list []string, key string) bool {
	for _, x := range list {
		if x == key {
			return true
		}
	}

	return false
}

//getSheetNames returns the number of Sheet field entries
// in a slice of datamapLine structs.
func getSheetNames(dmls ExtractedDatamapFile) []string {
	var sheetNames []string

	for _, dml := range dmls {
		if !sheetInSlice(sheetNames, dml.Sheet) {
			sheetNames = append(sheetNames, dml.Sheet)
		}
	}

	return sheetNames
}

// ReadDML returns a slice of datamapLine structs given a
// path to a datamap file.
func ReadDML(path string) (ExtractedDatamapFile, error) {
	var s ExtractedDatamapFile

	data, err := ioutil.ReadFile(path)

	if err != nil {
		return s, fmt.Errorf("Cannot find file: %s", path)
	}

	r := csv.NewReader(strings.NewReader(string(data)))

	for {
		record, err := r.Read()
		if err == io.EOF {
			break
		}

		if err != nil {
			return s, errors.New("Cannot read line %s")
		}

		if record[0] == "cell_key" {
			// this must be the header
			continue
		}

		dml := datamapLine{
			Key:     strings.Trim(record[0], " "),
			Sheet:   strings.Trim(record[1], " "),
			Cellref: strings.Trim(record[2], " ")}
		s = append(s, dml)
	}

	return s, nil
}

// cellVisitor is used by datamaps.rowVisitor() and is called
// on every cell in the target xlsx file in order to extract
// the data.
func cellVisitor(c *xlsx.Cell) error {
	x, y := c.GetCoordinates()
	cellref := xlsx.GetCellIDStringFromCoords(x, y)

	// TODO: we need to store the c.NumFmt value here in the
	// database so we can reply it again when we write the values
	// to the master or elsewhere. We should keep the value itself
	// in its unformatted state - i.e a date being something like 488594.

	ex := extractedCell{
		Cell:  c,
		Value: c.Value,
	}

	inner[cellref] = ex

	return nil
}

// rowVisitor is used as a callback by xlsx.sheet.ForEachRow(). It wraps
// a call to xlsx.Row.ForEachCell() which actually extracts the data.
func rowVisitor(r *xlsx.Row) error {
	if err := r.ForEachCell(cellVisitor, xlsx.SkipEmptyCells); err != nil {
		return err
	}
	return nil
}

// ReadXLSX returns a file at path's data as a map,
// keyed on sheet name. All values are returned as strings.
// Paths to a datamap and the spreadsheet file required.
func ReadXLSX(path string) FileData {
	wb, err := xlsx.OpenFile(path)
	if err != nil {
		fmt.Errorf("cannot open file at %s - %v", path, err)
	}

	outer := make(FileData, 1)

	// get the data
	for _, sheet := range wb.Sheets {

		if err := sheet.ForEachRow(rowVisitor); err != nil {
			fmt.Errorf("cannot call ForEachRow() in sheet %s - %v", sheet.Name, err)
		}
		outer[sheet.Name] = inner
		inner = make(sheetData)
	}

	return outer
}

// DatamapFromDB creates an ExtractedDatamapFile from the database given
// the name of a datamap. Of course, in this instance, the data is not
// coming from a datamap file (such as datamap.csv) but from datamap data
// previous stored in the database by DatamapToDB or similar.
func DatamapFromDB(name string, db *sql.DB) (ExtractedDatamapFile, error) {

	var out ExtractedDatamapFile

	query := `
	select
		key, sheet, cellref
	from datamap_line
		join datamap on datamap_line.dm_id = datamap.id where datamap.name = ?;
	`
	rows, err := db.Query(query, name)
	if err != nil {
		erstr := fmt.Sprintf("cannot query for datamap key, sheet and cellref - %v", err)
		return nil, errors.New(erstr)
	}
	defer rows.Close()

	for rows.Next() {
		var (
			key     string
			sheet   string
			cellref string
		)
		if err := rows.Scan(&key, &sheet, &cellref); err != nil {
			return nil, err
		}

		out = append(out, datamapLine{Key: key, Sheet: sheet, Cellref: cellref})
	}

	return out, nil
}

// ExtractDBDatamap uses a datamap named from the database db to extract values
// from the populated spreadsheet file file.
func ExtractDBDatamap(name string, file string, db *sql.DB) (ExtractedData, error) {
	ddata, err := DatamapFromDB(name, db) // this will need to return an ExtractedDatamapFile
	if err != nil {
		erstr := fmt.Sprintf("cannot call DatamapFromDB() - %v", err)
		return nil, errors.New(erstr)
	}
	if len(ddata) == 0 {
		return nil, fmt.Errorf("there is no datamap in the database matching name '%s'. Try running 'datamaps datamap --import...'", name)
	}
	xdata := ReadXLSX(file)

	names := getSheetNames(ddata)
	outer := make(ExtractedData, len(names))
	// var inner map[string]xlsx.Cell

	for _, s := range names {
		outer[s] = make(map[string]xlsx.Cell)
	}

	for _, i := range ddata {
		sheet := i.Sheet
		cellref := i.Cellref

		if val, ok := xdata[sheet][cellref]; ok {
			outer[sheet][cellref] = *val.Cell
		}
	}

	return outer, nil
}

// extract returns the file at path's data as a map,
// using the datamap as a filter, keyed on sheet name. All values
// are returned as strings. (Currently deprecated in favour of
// ExtractDBDatamap.
func extract(dm string, path string) ExtractedData {
	xdata := ReadXLSX(path)
	ddata, err := ReadDML(dm)

	if err != nil {
		log.Fatal(err)
	}

	names := getSheetNames(ddata)
	outer := make(ExtractedData, len(names))
	inner := make(map[string]xlsx.Cell)

	for _, i := range ddata {
		sheet := i.Sheet
		cellref := i.Cellref

		if val, ok := xdata[sheet][cellref]; ok {
			inner[cellref] = *val.Cell
			outer[sheet] = inner
		}
	}

	return outer
}

//getTargetFiles finds all xlsx and xlsm files in directory.
func getTargetFiles(path string) ([]string, error) {
	if lastchar := path[len(path)-1:]; lastchar != string(filepath.Separator) {
		return nil, fmt.Errorf("path must end in a %s character", string(filepath.Separator))
	}

	fullpath := strings.Join([]string{path, "*.xls[xm]"}, "")
	output, err := filepath.Glob(fullpath)

	if err != nil {
		return nil, err
	}

	if output == nil {
		return nil, fmt.Errorf("cannot find any xlsx files in %s", path)
	}

	return output, nil
}