aboutsummaryrefslogblamecommitdiffstats
path: root/reader/reader.go
blob: d99f2a8716b5886a0b5c711767928e8c960fa960 (plain) (tree)
1
2
3
4
5
6
7
8
9


              
                      
                      
                


                   
             
                       

                 

                                       
                                
                                  

 
      
                                               
                                          
                                              
                                     
                                                                                   
                                                     

 






                                        
                                           
                           



                        

 















                                                                                         

                                             








                                                   

                                                         
                                                 





                                                                  
                         

 
                                                 
                                                  


                                          
                                                                  







                                                           
                                                                   




                                                  



                                                              
                                  
         
                     

 
                                            

                                                           
                                       
 
                         
                                          
                       
                              
         
                                  
 
                       
                                           
                                        

                                                              



                                                                              
                                                    
                                                    
                                                      


                                                                              
                                                   
                         
                                                 

                 
                    
 

                                           

                                                                 

                                                        

                                 


                              

                                                

                                           
                                 

                                    
                                                         
                                                  
                                            

                 
                    
 


                                                            


                                                                                                     





                                                              
                                                                                


                          
package reader

import (
	"database/sql"
	"encoding/csv"
	"errors"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"path/filepath"
	"strings"

	_ "github.com/mattn/go-sqlite3"

	"github.com/tealeg/xlsx"
	"github.com/yulqen/coords"
)

type (
	// SheetData is the data from the sheet
	SheetData map[string]ExtractedCell
	// FileData is the data from the file.
	FileData map[string]SheetData
	// ExtractedData is the Extraced data from the file, filtered by a Datamap.
	ExtractedData map[string]map[string]xlsx.Cell
)

//DatamapLine - a line from the datamap.
type DatamapLine struct {
	Key     string
	Sheet   string
	Cellref string
}

//ExtractedCell is data pulled from a cell.
type ExtractedCell struct {
	Cell  *xlsx.Cell
	Col   string
	Row   int
	Value string
}

func SetupDB(path string) (*sql.DB, error) {
	db, err := sql.Open("sqlite3", path)
	if err != nil {
		return db, errors.New("Cannot open that damn database file")
	}
	stmt := `drop table if exists datamap;
			 create table datamap(id integer no null primary key, name text);
			`
	_, err = db.Exec(stmt)
	if err != nil {
		log.Printf("%q: %s\n", err, stmt)
	}

	return db, nil
}

//sheetInSlice is a helper which returns true
// if a string is in a slice of strings.
func sheetInSlice(list []string, key string) bool {
	for _, x := range list {
		if x == key {
			return true
		}
	}
	return false
}

//getSheetNames returns the number of Sheet field entries
// in a slice of DatamapLine structs.
func getSheetNames(dmls []DatamapLine) []string {
	var sheetNames []string
	for _, dml := range dmls {
		if sheetInSlice(sheetNames, dml.Sheet) == false {
			sheetNames = append(sheetNames, dml.Sheet)
		}
	}
	return sheetNames
}

//ReadDML returns a slice of DatamapLine structs.
func ReadDML(path string) ([]DatamapLine, error) {
	var s []DatamapLine
	data, err := ioutil.ReadFile(path)
	if err != nil {
		return s, fmt.Errorf("Cannot find file: %s", path)
	}
	r := csv.NewReader(strings.NewReader(string(data)))
	for {
		record, err := r.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return s, errors.New("Cannot read line %s")
		}
		if record[0] == "cell_key" {
			// this must be the header
			continue
		}
		dml := DatamapLine{
			Key:     strings.Trim(record[0], " "),
			Sheet:   strings.Trim(record[1], " "),
			Cellref: strings.Trim(record[2], " ")}
		s = append(s, dml)
	}
	return s, nil
}

//ReadXLSX returns the file's data as a map,
// keyed on sheet name. All values are returned as strings.
// Paths to a datamap and the spreadsheet file required.
func ReadXLSX(ssheet string) FileData {

	// open the files
	data, err := xlsx.OpenFile(ssheet)
	if err != nil {
		log.Fatal(err)
	}
	outer := make(FileData, 1)

	// get the data
	for _, sheet := range data.Sheets {
		inner := make(SheetData)
		for rowLidx, row := range sheet.Rows {
			for colLidx, cell := range row.Cells {
				colStr, err := coords.ColIndexToAlpha(colLidx)
				if err != nil {
					log.Fatal(err)
				}
				ex := ExtractedCell{
					Cell:  cell,
					Col:   colStr,
					Row:   rowLidx + 1,
					Value: cell.Value}
				cellref := fmt.Sprintf("%s%d", ex.Col, ex.Row)
				inner[cellref] = ex
			}
			outer[sheet.Name] = inner
		}
	}
	return outer
}

//Extract returns the file's data as a map,
// using the datamap as a filter, keyed on sheet name. All values
// are returned as strings.
// Paths to a datamap and the spreadsheet file required.
func Extract(dm string, ssheet string) ExtractedData {
	xdata := ReadXLSX(ssheet)
	ddata, err := ReadDML(dm)
	if err != nil {
		log.Fatal(err)
	}
	names := getSheetNames(ddata)
	outer := make(ExtractedData, len(names))
	inner := make(map[string]xlsx.Cell)

	for _, i := range ddata {
		sheet := i.Sheet
		cellref := i.Cellref
		if val, ok := xdata[sheet][cellref]; ok {
			inner[cellref] = *val.Cell
			outer[sheet] = inner
		}
	}
	return outer
}

//GetTargetFiles finds all xlsx and xlsm files in directory.
func GetTargetFiles(path string) ([]string, error) {
	if lastchar := path[len(path)-1:]; lastchar != string(filepath.Separator) {
		return nil, fmt.Errorf("path must end in a %s character", string(filepath.Separator))
	}
	fullpath := strings.Join([]string{path, "*.xlsx"}, "")
	output, err := filepath.Glob(fullpath)
	if err != nil {
		return nil, err
	}
	if output == nil {
		return nil, fmt.Errorf("cannot find any xlsx files in %s", path)
	}
	return output, nil
}