package reader import ( "encoding/csv" "errors" "fmt" "io" "io/ioutil" "log" "strings" "github.com/tealeg/xlsx" ) const ( maxCols = 16384 maxAlphabets = (maxCols / 26) - 1 ) type ( // Data from the sheet. SheetData map[string]ExtractedCell // Data from the file. FileData map[string]SheetData // Data from the file, filtered by a Datamap. ExtractedData map[string]map[string]xlsx.Cell ) var colstream = cols(maxAlphabets) //DatamapLine - a line from the datamap. type DatamapLine struct { Key string Sheet string Cellref string } //ExtractedCell is data pulled from a cell. type ExtractedCell struct { Cell *xlsx.Cell Col string Row int Value string } //sheetInSlice is a helper which returns true // if a string is in a slice of strings. func sheetInSlice(list []string, key string) bool { for _, x := range list { if x == key { return true } } return false } //getSheetNames returns the number of Sheet field entries // in a slice of DatamapLine structs. func getSheetNames(dmls []DatamapLine) []string { var sheetNames []string for _, dml := range dmls { if sheetInSlice(sheetNames, dml.Sheet) == false { sheetNames = append(sheetNames, dml.Sheet) } } return sheetNames } //ReadDML returns a slice of DatamapLine structs. func ReadDML(path string) ([]DatamapLine, error) { var s []DatamapLine data, err := ioutil.ReadFile(path) if err != nil { return s, fmt.Errorf("Cannot find file: %s", path) } r := csv.NewReader(strings.NewReader(string(data))) for { record, err := r.Read() if err == io.EOF { break } if err != nil { return s, errors.New("Cannot read line %s") } if record[0] == "cell_key" { // this must be the header continue } dml := DatamapLine{ Key: strings.Trim(record[0], " "), Sheet: strings.Trim(record[1], " "), Cellref: strings.Trim(record[2], " ")} s = append(s, dml) } return s, nil } //alphabet generates all the letters of the alphabet. func alphabet() []string { letters := make([]string, 26) for idx := range letters { letters[idx] = string('A' + byte(idx)) } return letters } //cols generates the alpha column compont of Excel cell references //Adds n alphabets to the first (A..Z) alphabet. func cols(n int) []string { out := alphabet() alen := len(out) tmp := make([]string, alen) copy(tmp, out) for cycle := 0; cycle < n; cycle++ { for y := 0; y < alen; y++ { out = append(out, out[(cycle+2)-2]+tmp[y]) } } return out } //ReadXLSX returns the file's data as a map, // keyed on sheet name. All values are returned as strings. // Paths to a datamap and the spreadsheet file required. func ReadXLSX(ssheet string) FileData { // open the files data, err := xlsx.OpenFile(ssheet) if err != nil { log.Fatal(err) } outer := make(FileData, 1) // get the data for _, sheet := range data.Sheets { inner := make(SheetData) for rowLidx, row := range sheet.Rows { for colLidx, cell := range row.Cells { ex := ExtractedCell{ Cell: cell, Col: colstream[colLidx], Row: rowLidx + 1, Value: cell.Value} cellref := fmt.Sprintf("%s%d", ex.Col, ex.Row) inner[cellref] = ex } outer[sheet.Name] = inner } } return outer } //Extract returns the file's data as a map, // using the datamap as a filter, keyed on sheet name. All values // are returned as strings. // Paths to a datamap and the spreadsheet file required. func Extract(dm string, ssheet string) ExtractedData { xdata := ReadXLSX(ssheet) ddata, err := ReadDML(dm) if err != nil { log.Fatal(err) } names := getSheetNames(ddata) outer := make(ExtractedData, len(names)) inner := make(map[string]xlsx.Cell) for _, i := range ddata { sheet := i.Sheet cellref := i.Cellref if val, ok := xdata[sheet][cellref]; ok { inner[cellref] = *val.Cell outer[sheet] = inner } } return outer }