aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Lemon <y@yulqen.org>2024-04-19 20:51:48 +0100
committerMatthew Lemon <y@yulqen.org>2024-04-19 20:51:48 +0100
commit04f78ef7a46ae4c4b8ab49153c16983c590783f5 (patch)
tree77825f91fb6175f300c2667be4c9a91b19aab417
parent85d77655a515c120e5c0ba3c48c6be4eda1ac8f6 (diff)
First parse of an Excel file
-rw-r--r--cmd/dbasik-api/datamaps.go22
-rw-r--r--cmd/dbasik-api/datamaps_test.go80
-rw-r--r--cmd/dbasik-api/handlers.go42
-rw-r--r--cmd/dbasik-api/helpers.go12
-rw-r--r--cmd/dbasik-api/return.go130
-rw-r--r--cmd/dbasik-api/return_test.go168
-rw-r--r--datamap.csv2
-rw-r--r--testdata/valid_excel.xlsxbin0 -> 5870 bytes
8 files changed, 387 insertions, 69 deletions
diff --git a/cmd/dbasik-api/datamaps.go b/cmd/dbasik-api/datamaps.go
index 1121581..65a8808 100644
--- a/cmd/dbasik-api/datamaps.go
+++ b/cmd/dbasik-api/datamaps.go
@@ -23,7 +23,7 @@ import (
"time"
)
-// A custom err to return from our Get() method when looking up a datamap
+// ErrRecordNotFound A custom err to return from our Get() method when looking up a Datamap
// that doesn't exist
var (
ErrRecordNotFound = errors.New("record not found")
@@ -36,35 +36,37 @@ type Models struct {
DatamapLines datamapLineModel
}
-// datamapLine holds the data parsed from each line of a submitted datamap CSV file.
+// DatamapLine holds the data parsed from each line of a submitted Datamap CSV file.
// The fields need to be exported otherwise they won't be included when encoding
// the struct to json.
-type datamapLine struct {
+type DatamapLine struct {
ID int64 `json:"id"`
Key string `json:"key"`
Sheet string `json:"sheet"`
DataType string `json:"datatype"`
- Cellref string `json:"cellref"`
+ CellRef string `json:"cellref"`
}
type datamapLineModel struct {
DB *sql.DB
}
-// datamap includes a slice of datamapLine objects alongside header metadata
-type datamap struct {
+// Datamap includes a slice of DatamapLine objects alongside header metadata
+type Datamap struct {
ID int64 `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
Created time.Time `json:"created"`
- DMLs []datamapLine `json:"datamap_lines"`
+ DMLs []DatamapLine `json:"datamap_lines"`
}
type datamapModel struct {
DB *sql.DB
}
-func GetSheetsFromDM(dm datamap) []string {
+// GetSheetsFromDM extracts a set of sheet names from a Datamap struct
+func GetSheetsFromDM(dm Datamap) []string {
+ // this is basically how sets are done in Go - see https://www.sohamkamani.com/golang/sets/
sheets := map[string]struct{}{}
for _, dml := range dm.DMLs {
sheets[dml.Sheet] = struct{}{}
@@ -83,7 +85,7 @@ func NewModels(db *sql.DB) Models {
}
}
-func (m *datamapLineModel) Insert(dm datamap, dmls []datamapLine) (int, error) {
+func (m *datamapLineModel) Insert(dm Datamap, dmls []DatamapLine) (int, error) {
var datamapID int64
tx, err := m.DB.Begin()
if err != nil {
@@ -111,7 +113,7 @@ func (m *datamapLineModel) Insert(dm datamap, dmls []datamapLine) (int, error) {
line.Key,
line.Sheet,
line.DataType,
- line.Cellref)
+ line.CellRef)
if err != nil {
tx.Rollback()
return 0, err
diff --git a/cmd/dbasik-api/datamaps_test.go b/cmd/dbasik-api/datamaps_test.go
index 428f301..827a054 100644
--- a/cmd/dbasik-api/datamaps_test.go
+++ b/cmd/dbasik-api/datamaps_test.go
@@ -6,45 +6,53 @@ import (
"time"
)
-func TestReadDML(t *testing.T) {
- dm := datamap{
- ID: 0,
- Name: "Test Name",
- Description: "Test description",
- Created: time.Now(),
- DMLs: []datamapLine{
- {
- ID: 1,
- Key: "Test Key",
- Sheet: "Test Sheet",
- DataType: "TEXT",
- Cellref: "A10",
- },
- {
- ID: 2,
- Key: "Test Key 2",
- Sheet: "Test Sheet",
- DataType: "TEXT",
- Cellref: "A11",
- },
- {
- ID: 3,
- Key: "Test Key 3",
- Sheet: "Test Sheet 2",
- DataType: "TEXT",
- Cellref: "A12",
+func TestGetSheetsFromDM(t *testing.T) {
+ testCases := []struct {
+ name string
+ datamap Datamap
+ expected []string
+ }{
+ {
+ name: "Extract unique sheet names",
+ datamap: Datamap{
+ ID: 0,
+ Name: "Test Name",
+ Description: "Test description",
+ Created: time.Now(),
+ DMLs: []DatamapLine{
+ {
+ ID: 1,
+ Key: "Test Key",
+ Sheet: "Test Sheet",
+ DataType: "TEXT",
+ CellRef: "A10",
+ },
+ {
+ ID: 2,
+ Key: "Test Key 2",
+ Sheet: "Test Sheet",
+ DataType: "TEXT",
+ CellRef: "A11",
+ },
+ {
+ ID: 3,
+ Key: "Test Key 3",
+ Sheet: "Test Sheet 2",
+ DataType: "TEXT",
+ CellRef: "A12",
+ },
+ },
},
+ expected: []string{"Test Sheet", "Test Sheet 2"},
},
}
- got := GetSheetsFromDM(dm)
- if !slices.Contains(got, "Test Sheet") {
- t.Errorf("expected to find Test Sheet in %v but didn't find it", got)
- }
- if !slices.Contains(got, "Test Sheet 2") {
- t.Errorf("expected to find Test Sheet in %v but didn't find it", got)
- }
- if slices.Contains(got, "Test Sheet 3") {
- t.Errorf("expected to find Test Sheet in %v but didn't find it", got)
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ got := GetSheetsFromDM(tc.datamap)
+ if !slices.Equal(got, tc.expected) {
+ t.Errorf("GetSheetsFromDM(%v) = %v, expected %v", tc.datamap, got, tc.expected)
+ }
+ })
}
}
diff --git a/cmd/dbasik-api/handlers.go b/cmd/dbasik-api/handlers.go
index c79de7e..b6b4443 100644
--- a/cmd/dbasik-api/handlers.go
+++ b/cmd/dbasik-api/handlers.go
@@ -53,7 +53,7 @@ func (app *application) createReturnHandler(w http.ResponseWriter, r *http.Reque
// parse the csv
reader := csv.NewReader(file)
- var dmls []datamapLine
+ var dmls []DatamapLine
for {
line, err := reader.Read()
@@ -69,16 +69,16 @@ func (app *application) createReturnHandler(w http.ResponseWriter, r *http.Reque
return
}
- dmls = append(dmls, datamapLine{
+ dmls = append(dmls, DatamapLine{
Key: line[0],
Sheet: line[1],
DataType: line[2],
- Cellref: line[3],
+ CellRef: line[3],
})
}
- dm := datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls}
+ dm := Datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls}
- // Parse the XLSX file based on the datamap...
+ // Parse the XLSX file based on the Datamap...
// open an existing file
wb, err := xlsx.OpenFile(dst.Name())
if err != nil {
@@ -95,7 +95,7 @@ func (app *application) createReturnHandler(w http.ResponseWriter, r *http.Reque
//Here is where we parse our files.
- err = app.writeJSONPretty(w, http.StatusOK, envelope{"datamap": dm}, nil)
+ err = app.writeJSONPretty(w, http.StatusOK, envelope{"Datamap": dm}, nil)
if err != nil {
app.logger.Debug("writing out csv", "err", err)
app.serverErrorResponse(w, r, err)
@@ -128,8 +128,8 @@ func (app *application) createDatamapHandler(w http.ResponseWriter, r *http.Requ
// parse the csv
reader := csv.NewReader(file)
- var dmls []datamapLine
- var dm datamap
+ var dmls []DatamapLine
+ var dm Datamap
for {
line, err := reader.Read()
@@ -145,16 +145,16 @@ func (app *application) createDatamapHandler(w http.ResponseWriter, r *http.Requ
return
}
- dmls = append(dmls, datamapLine{
+ dmls = append(dmls, DatamapLine{
Key: line[0],
Sheet: line[1],
DataType: line[2],
- Cellref: line[3],
+ CellRef: line[3],
})
}
- dm = datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls}
+ dm = Datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls}
- err = app.writeJSONPretty(w, http.StatusOK, envelope{"datamap": dm}, nil)
+ err = app.writeJSONPretty(w, http.StatusOK, envelope{"Datamap": dm}, nil)
if err != nil {
app.logger.Debug("writing out csv", "err", err)
app.serverErrorResponse(w, r, err)
@@ -185,8 +185,8 @@ func (app *application) saveDatamapHandler(w http.ResponseWriter, r *http.Reques
// parse the csv
reader := csv.NewReader(file)
- var dmls []datamapLine
- var dm datamap
+ var dmls []DatamapLine
+ var dm Datamap
for {
line, err := reader.Read()
@@ -202,16 +202,16 @@ func (app *application) saveDatamapHandler(w http.ResponseWriter, r *http.Reques
return
}
- dmls = append(dmls, datamapLine{
+ dmls = append(dmls, DatamapLine{
ID: 0,
Key: line[0],
Sheet: line[1],
DataType: line[2],
- Cellref: line[3],
+ CellRef: line[3],
})
}
- dm = datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls}
+ dm = Datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls}
// save to the database
_, err = app.models.DatamapLines.Insert(dm, dmls)
@@ -224,9 +224,9 @@ func (app *application) saveDatamapHandler(w http.ResponseWriter, r *http.Reques
func (app *application) getJSONForDatamap(w http.ResponseWriter, r *http.Request) {
// Get the DM out of the database
- // dm = datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls}
+ // dm = Datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls}
- // err = app.writeJSONPretty(w, http.StatusOK, envelope{"datamap": dm}, nil)
+ // err = app.writeJSONPretty(w, http.StatusOK, envelope{"Datamap": dm}, nil)
// if err != nil {
// app.logger.Debug("writing out csv", "err", err)
// app.serverErrorResponse(w, r, err)
@@ -242,11 +242,11 @@ func (app *application) showDatamapHandler(w http.ResponseWriter, r *http.Reques
if err != nil || id_int < 1 {
app.notFoundResponse(w, r)
}
- fmt.Fprintf(w, "show the details for datamap %d\n", id_int)
+ fmt.Fprintf(w, "show the details for Datamap %d\n", id_int)
}
func (app *application) createDatamapLine(w http.ResponseWriter, r *http.Request) {
- var input datamapLine
+ var input DatamapLine
err := json.NewDecoder(r.Body).Decode(&input)
if err != nil {
app.errorResponse(w, r, http.StatusBadRequest, err.Error())
diff --git a/cmd/dbasik-api/helpers.go b/cmd/dbasik-api/helpers.go
index 3b07dc2..68d9f66 100644
--- a/cmd/dbasik-api/helpers.go
+++ b/cmd/dbasik-api/helpers.go
@@ -20,9 +20,19 @@ package main
import (
"encoding/json"
"net/http"
+ "regexp"
)
-// We want this so that our JSON is nested under a key at the top, e.g. "datamap:"...
+// validateSpreadsheetCell checks that the cellRef is in a valid format
+func validateSpreadsheetCell(cellRef string) bool {
+ pattern := `^[A-Z]+[1-9][0-9]*$`
+
+ regExp := regexp.MustCompile(pattern)
+
+ return regExp.MatchString(cellRef)
+}
+
+// We want this so that our JSON is nested under a key at the top, e.g. "Datamap:"...
type envelope map[string]interface{}
// writeJSON)Pretty() helper for sending responses - pretty prints output. This takes the destination http.ResponseWriter, the
diff --git a/cmd/dbasik-api/return.go b/cmd/dbasik-api/return.go
new file mode 100644
index 0000000..294c960
--- /dev/null
+++ b/cmd/dbasik-api/return.go
@@ -0,0 +1,130 @@
+package main
+
+import (
+ "fmt"
+ "github.com/tealeg/xlsx/v3"
+ "path/filepath"
+)
+
+type ReturnLine struct {
+ Sheet string
+ CellRef string
+ Value string
+}
+
+type Return struct {
+ Name string
+ ReturnLines []ReturnLine
+}
+
+// NewReturnLine creates a new ReturnLine object
+func NewReturnLine(sheet, cellRef, value string) (*ReturnLine, error) {
+ if err := validateInputs(sheet, cellRef, value); err != nil {
+ return nil, err
+ }
+
+ if !validateSpreadsheetCell(cellRef) {
+ return nil, fmt.Errorf("cellRef must be A1 format")
+ }
+
+ return &ReturnLine{
+ Sheet: sheet,
+ CellRef: cellRef,
+ Value: value,
+ }, nil
+}
+
+func validateInputs(sheet, cellRef, value string) error {
+ if sheet == "" {
+ return fmt.Errorf("sheet parameter is required")
+ }
+ if cellRef == "" {
+ return fmt.Errorf("cellRef parameter is required")
+ }
+ if value == "" {
+ return fmt.Errorf("value parameter is required")
+ }
+ return nil
+}
+
+func NewReturn(name string, dm *Datamap, returnLines []ReturnLine) (*Return, error) {
+ if len(returnLines) == 0 {
+ return nil, fmt.Errorf("ReturnLines must contain at least one ReturnLine")
+ }
+
+ return &Return{
+ Name: name,
+ ReturnLines: returnLines,
+ }, nil
+}
+
+//func cellVisitor(c *xlsx.Cell) error {
+// value, err := c.FormattedValue()
+// if err != nil {
+// fmt.Println(err.Error())
+// } else {
+// fmt.Printf("Sheet: %s, Cell value: %s\n", c.Row.Sheet.Name, value)
+// }
+// return err
+//}
+
+//func rowVisitor(r *xlsx.Row) error {
+// return r.ForEachCell(cellVisitor, xlsx.SkipEmptyCells)
+//}
+
+func ParseXLSX(filePath string, dm *Datamap) (*Return, error) {
+ // Use tealeg/xlsx to parse the Excel file
+ wb, err := xlsx.OpenFile(filePath)
+ if err != nil {
+ return nil, err
+ }
+
+ // Get the set of sheets from the Datamap
+ sheets := GetSheetsFromDM(*dm)
+
+ // Loop through all DatamapLines
+ returnLines := []ReturnLine{}
+ for _, dml := range dm.DMLs {
+ // Check if the sheet for this DatamapLine is in the set of sheets
+ if !contains(sheets, dml.Sheet) {
+ continue
+ }
+
+ sh, ok := wb.Sheet[dml.Sheet]
+ if !ok {
+ return nil, fmt.Errorf("sheet %s not found in Excel file", dml.Sheet)
+ }
+
+ col, row, err := xlsx.GetCoordsFromCellIDString(dml.CellRef)
+ if err != nil {
+ return nil, err
+ }
+ cell, err := sh.Cell(row, col)
+ if err != nil {
+ return nil, err
+ }
+ returnLines = append(returnLines, ReturnLine{
+ Sheet: dml.Sheet,
+ CellRef: dml.CellRef,
+ Value: cell.Value, // or cell.FormattedValue() if you need formatted values
+ })
+ }
+
+ // Here we create a new Return object with the name of the Excel file and the ReturnLines slice
+ // that we just populated
+ rtn, err := NewReturn(filepath.Base(filePath), dm, returnLines)
+ if err != nil {
+ return nil, err
+ }
+ return rtn, nil
+}
+
+// contains checks if a slice contains a given string
+func contains(slice []string, str string) bool {
+ for _, s := range slice {
+ if s == str {
+ return true
+ }
+ }
+ return false
+}
diff --git a/cmd/dbasik-api/return_test.go b/cmd/dbasik-api/return_test.go
new file mode 100644
index 0000000..c6a8664
--- /dev/null
+++ b/cmd/dbasik-api/return_test.go
@@ -0,0 +1,168 @@
+package main
+
+import (
+ "reflect"
+ "testing"
+ "time"
+)
+
+func TestCreateNewReturn(t *testing.T) {
+ dm := &Datamap{
+ ID: 1,
+ Name: "test name",
+ Description: "test description",
+ Created: time.Now(),
+ DMLs: []DatamapLine{
+ {
+ ID: 1,
+ Key: "test key",
+ Sheet: "test sheet",
+ DataType: "test datatype",
+ CellRef: "test cellref",
+ },
+ },
+ }
+ // Call NewReturn with an empty []ReturnLine slice
+ rt, err := NewReturn("test name", dm, []ReturnLine{})
+ if err == nil {
+ t.Error("Expected an error when passing an empty []ReturnLine slice")
+ }
+
+ // Check if the error message is as expected
+ expectedErrorMsg := "ReturnLines must contain at least one ReturnLine"
+ if err != nil && err.Error() != expectedErrorMsg {
+ t.Errorf("Unexpected error message. Expected: %s, Got: %s", expectedErrorMsg, err.Error())
+ }
+
+ // Check if the returned Return struct is nil
+ if rt != nil {
+ t.Error("Expected a nil Return struct when an error occurs")
+ }
+}
+
+func TestNewReturnLine(t *testing.T) {
+ rl, err := NewReturnLine("stabs", "C1", "Knocker")
+ if err != nil {
+ t.Fatal(err)
+ }
+ if rl == nil {
+ t.Errorf("NewReturnLine() returned nil")
+ }
+ if rl.Sheet != "stabs" {
+ t.Errorf("NewReturnLine() returned wrong sheet")
+ }
+}
+
+func TestReturnLineCellRefFormat(t *testing.T) {
+ _, err := NewReturnLine("stabs", "CC", "Knocker")
+ if err != nil {
+ if err.Error() != "cellRef must be A1 format" {
+ t.Errorf("NewReturnLine() returned wrong error")
+ }
+ }
+}
+
+func TestValidateInputs(t *testing.T) {
+ // Happy path
+ err := validateInputs("Sheet1", "A1", "value")
+ if err != nil {
+ t.Errorf("validateInputs failed: %v", err)
+ }
+
+ // Missing sheet
+ err = validateInputs("", "A1", "value")
+ if err == nil {
+ t.Error("Expected error for missing sheet")
+ }
+ if err.Error() != "sheet parameter is required" {
+ t.Error("Expected error for missing sheet")
+ }
+ // Missing cellRef
+ err = validateInputs("Sheet1", "", "value")
+ if err == nil {
+ t.Error("cellRef parameter is required")
+ }
+
+ // Missing value
+ err = validateInputs("Sheet1", "A1", "")
+ if err == nil {
+ t.Error("value parameter is required")
+ }
+}
+
+func TestHelper_validateSpreadsheetCell(t *testing.T) {
+ if validateSpreadsheetCell("19") != false {
+ t.Errorf("Helper.validateSpreadsheetCell() did not return false")
+ }
+
+ if validateSpreadsheetCell("1") != false {
+ t.Errorf("Helper.validateSpreadsheetCell() did not return false")
+ }
+
+ if validateSpreadsheetCell("A10") != true {
+ t.Errorf("Helper.validateSpreadsheetCell() did not return true")
+ }
+}
+
+func TestParseXLSX(t *testing.T) {
+ tests := []struct {
+ name string
+ filePath string
+ dm *Datamap
+ want *Return
+ wantErr bool
+ }{
+ {
+ name: "Valid_Excel_file",
+ filePath: "../../testdata/valid_excel.xlsx",
+ dm: &Datamap{
+ DMLs: []DatamapLine{
+ {Sheet: "Sheet1", CellRef: "A1"},
+ {Sheet: "Sheet1", CellRef: "B1"},
+ {Sheet: "Sheet2", CellRef: "C1"},
+ },
+ },
+ want: &Return{
+ Name: "valid_excel.xlsx",
+ ReturnLines: []ReturnLine{
+ {Sheet: "Sheet1", CellRef: "A1", Value: "Value 1"},
+ {Sheet: "Sheet1", CellRef: "B1", Value: "Value 2"},
+ {Sheet: "Sheet2", CellRef: "C1", Value: "Value 3"},
+ },
+ },
+ wantErr: false,
+ },
+ // Add more test cases as needed
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got, err := ParseXLSX(tt.filePath, tt.dm)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("ParseXLSX() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+
+ if !reflect.DeepEqual(got.Name, tt.want.Name) {
+ t.Errorf("ParseXLSX() FileName = %v, want %v", got.Name, tt.want.Name)
+ }
+
+ if len(got.ReturnLines) != len(tt.want.ReturnLines) {
+ t.Errorf("ParseXLSX() ReturnLines length = %v, want %v", len(got.ReturnLines), len(tt.want.ReturnLines))
+ return
+ }
+
+ for i := range got.ReturnLines {
+ if got.ReturnLines[i].Sheet != tt.want.ReturnLines[i].Sheet {
+ t.Errorf("ParseXLSX() ReturnLines[%d].Sheet = %v, want %v", i, got.ReturnLines[i].Sheet, tt.want.ReturnLines[i].Sheet)
+ }
+ if got.ReturnLines[i].CellRef != tt.want.ReturnLines[i].CellRef {
+ t.Errorf("ParseXLSX() ReturnLines[%d].CellRef = %v, want %v", i, got.ReturnLines[i].CellRef, tt.want.ReturnLines[i].CellRef)
+ }
+ if got.ReturnLines[i].Value != tt.want.ReturnLines[i].Value {
+ t.Errorf("ParseXLSX() ReturnLines[%d].Value = %v, want %v", i, got.ReturnLines[i].Value, tt.want.ReturnLines[i].Value)
+ }
+ }
+ })
+ }
+}
diff --git a/datamap.csv b/datamap.csv
index 9a9e13c..1e0f0a8 100644
--- a/datamap.csv
+++ b/datamap.csv
@@ -1,3 +1,3 @@
Project/Programme Name,rtp_template,TEXT,E6
-Acronym,rtp_template,TEXT,E12
+Acronym,rtp_template,TEXT,E6
diff --git a/testdata/valid_excel.xlsx b/testdata/valid_excel.xlsx
new file mode 100644
index 0000000..b488693
--- /dev/null
+++ b/testdata/valid_excel.xlsx
Binary files differ