aboutsummaryrefslogblamecommitdiffstats
path: root/reader/reader.go
blob: 71870c51be829ae1dd4b6763f876483cc20b5dd9 (plain) (tree)
1
2
3
4
5
6
7
8
9



                      
                


                   
             




                                



                                         
 




                                          

                                                     

 
                                  
 






                                        
                                           
                           



                          

 

                                             








                                                   

                                                         
                                                 





                                                                  
                         

 
                                                 
                                                  


                                          
                                                                  







                                                           
                                                                   




                                                  



                                                              
                                  
         
                     

 
                                                     
                          






                                                      
                                                                  
                                                




                                   
                                            

                                                                  

                 
                  

 
                                            

                                                           
                                                  
 





                                                               
                         
                                               
                       
                              
         

                                   
                              
         
                                            

                                                 

                                                
                                       

                                                              
                                                    




                                                                                   
                                                  
                         
                                                 

                 
                     
 


























                                                                                 
package reader

import (
	"encoding/csv"
	"errors"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"strings"

	"github.com/tealeg/xlsx"
)

const (
	maxCols      = 16384
	maxAlphabets = (maxCols / 26) - 1
)

type (
	// Data from the sheet.
	SheetData map[string]ExtractedCell
	// Data from the file.
	FileData map[string]SheetData
	// Data from the file, filtered by a Datamap.
	ExtractedData map[string]map[string]xlsx.Cell
)

var colstream = cols(maxAlphabets)

//DatamapLine - a line from the datamap.
type DatamapLine struct {
	Key     string
	Sheet   string
	Cellref string
}

//ExtractedCell is data pulled from a cell.
type ExtractedCell struct {
	Cell    *xlsx.Cell
	ColL    string
	RowLidx int
	Value   string
}

//sheetInSlice is a helper which returns true
// if a string is in a slice of strings.
func sheetInSlice(list []string, key string) bool {
	for _, x := range list {
		if x == key {
			return true
		}
	}
	return false
}

//getSheetNames returns the number of Sheet field entries
// in a slice of DatamapLine structs.
func getSheetNames(dmls []DatamapLine) []string {
	var sheetNames []string
	for _, dml := range dmls {
		if sheetInSlice(sheetNames, dml.Sheet) == false {
			sheetNames = append(sheetNames, dml.Sheet)
		}
	}
	return sheetNames
}

//ReadDML returns a slice of DatamapLine structs.
func ReadDML(path string) ([]DatamapLine, error) {
	var s []DatamapLine
	data, err := ioutil.ReadFile(path)
	if err != nil {
		return s, fmt.Errorf("Cannot find file: %s", path)
	}
	r := csv.NewReader(strings.NewReader(string(data)))
	for {
		record, err := r.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return s, errors.New("Cannot read line %s")
		}
		if record[0] == "cell_key" {
			// this must be the header
			continue
		}
		dml := DatamapLine{
			Key:     strings.Trim(record[0], " "),
			Sheet:   strings.Trim(record[1], " "),
			Cellref: strings.Trim(record[2], " ")}
		s = append(s, dml)
	}
	return s, nil
}

//alphabet generates all the letters of the alphabet.
func alphabet() []string {
	letters := make([]string, 26)
	for idx := range letters {
		letters[idx] = string('A' + byte(idx))
	}
	return letters
}

//cols generates the alpha column compont of Excel cell references
//Adds n alphabets to the first (A..Z) alphabet.
func cols(n int) []string {
	out := alphabet()
	alen := len(out)
	tmp := make([]string, alen)
	copy(tmp, out)
	for cycle := 0; cycle < n; cycle++ {
		for y := 0; y < alen; y++ {
			out = append(out, out[(cycle+2)-2]+tmp[y])
		}
	}
	return out
}

//ReadXLSX returns the file's data as a map,
// keyed on sheet name. All values are returned as strings.
// Paths to a datamap and the spreadsheet file required.
func ReadXLSX(dm string, ssheet string) FileData {

	// TODO - to implement filtering by Datamap,
	// pull the data first, then go through each
	// item in the dmlData slice to check for sheet/cellref
	// matches, and then return them. Duplicate this func
	// to do so

	// open the files
	excelData, err := xlsx.OpenFile(ssheet)
	if err != nil {
		log.Fatal(err)
	}
	dmlData, err := ReadDML(dm)
	if err != nil {
		log.Fatal(err)
	}
	sheetNames := getSheetNames(dmlData)
	output := make(FileData, len(sheetNames))

	// get the data
	for _, sheet := range excelData.Sheets {
		data := make(SheetData)
		for rowLidx, row := range sheet.Rows {
			for colLidx, cell := range row.Cells {
				ex := ExtractedCell{
					Cell:    cell,
					ColL:    colstream[colLidx],
					RowLidx: rowLidx + 1,
					Value:   cell.Value}
				cellref := fmt.Sprintf("%s%d", ex.ColL, ex.RowLidx)
				data[cellref] = ex
			}
			output[sheet.Name] = data
		}
	}
	return output
}

//Extract returns the file's data as a map,
// keyed on sheet name. All values are returned as strings.
// Paths to a datamap and the spreadsheet file required.
func Extract(dm string, ssheet string) ExtractedData {
	data := ReadXLSX(dm, ssheet)
	dmlData, err := ReadDML(dm)
	if err != nil {
		log.Fatal(err)
	}
	sheetNames := getSheetNames(dmlData)
	output := make(ExtractedData, len(sheetNames))

	for _, i := range dmlData {
		sheet := i.Sheet
		cellref := i.Cellref
		if val, ok := data[sheet][cellref]; ok {
			// TODO check what is happening here...
			// ddg "golang assingment to entry in nil map"
			// first SO entry, but I don't think I totally understand
			inner := make(map[string]xlsx.Cell)
			inner[cellref] = *val.Cell
			output[sheet] = inner
		}
	}
	return output
}