diff options
author | Matthew Lemon <y@yulqen.org> | 2024-04-19 20:51:48 +0100 |
---|---|---|
committer | Matthew Lemon <y@yulqen.org> | 2024-04-19 20:51:48 +0100 |
commit | 04f78ef7a46ae4c4b8ab49153c16983c590783f5 (patch) | |
tree | 77825f91fb6175f300c2667be4c9a91b19aab417 | |
parent | 85d77655a515c120e5c0ba3c48c6be4eda1ac8f6 (diff) |
First parse of an Excel file
-rw-r--r-- | cmd/dbasik-api/datamaps.go | 22 | ||||
-rw-r--r-- | cmd/dbasik-api/datamaps_test.go | 80 | ||||
-rw-r--r-- | cmd/dbasik-api/handlers.go | 42 | ||||
-rw-r--r-- | cmd/dbasik-api/helpers.go | 12 | ||||
-rw-r--r-- | cmd/dbasik-api/return.go | 130 | ||||
-rw-r--r-- | cmd/dbasik-api/return_test.go | 168 | ||||
-rw-r--r-- | datamap.csv | 2 | ||||
-rw-r--r-- | testdata/valid_excel.xlsx | bin | 0 -> 5870 bytes |
8 files changed, 387 insertions, 69 deletions
diff --git a/cmd/dbasik-api/datamaps.go b/cmd/dbasik-api/datamaps.go index 1121581..65a8808 100644 --- a/cmd/dbasik-api/datamaps.go +++ b/cmd/dbasik-api/datamaps.go @@ -23,7 +23,7 @@ import ( "time" ) -// A custom err to return from our Get() method when looking up a datamap +// ErrRecordNotFound A custom err to return from our Get() method when looking up a Datamap // that doesn't exist var ( ErrRecordNotFound = errors.New("record not found") @@ -36,35 +36,37 @@ type Models struct { DatamapLines datamapLineModel } -// datamapLine holds the data parsed from each line of a submitted datamap CSV file. +// DatamapLine holds the data parsed from each line of a submitted Datamap CSV file. // The fields need to be exported otherwise they won't be included when encoding // the struct to json. -type datamapLine struct { +type DatamapLine struct { ID int64 `json:"id"` Key string `json:"key"` Sheet string `json:"sheet"` DataType string `json:"datatype"` - Cellref string `json:"cellref"` + CellRef string `json:"cellref"` } type datamapLineModel struct { DB *sql.DB } -// datamap includes a slice of datamapLine objects alongside header metadata -type datamap struct { +// Datamap includes a slice of DatamapLine objects alongside header metadata +type Datamap struct { ID int64 `json:"id"` Name string `json:"name"` Description string `json:"description"` Created time.Time `json:"created"` - DMLs []datamapLine `json:"datamap_lines"` + DMLs []DatamapLine `json:"datamap_lines"` } type datamapModel struct { DB *sql.DB } -func GetSheetsFromDM(dm datamap) []string { +// GetSheetsFromDM extracts a set of sheet names from a Datamap struct +func GetSheetsFromDM(dm Datamap) []string { + // this is basically how sets are done in Go - see https://www.sohamkamani.com/golang/sets/ sheets := map[string]struct{}{} for _, dml := range dm.DMLs { sheets[dml.Sheet] = struct{}{} @@ -83,7 +85,7 @@ func NewModels(db *sql.DB) Models { } } -func (m *datamapLineModel) Insert(dm datamap, dmls []datamapLine) (int, error) { +func (m *datamapLineModel) Insert(dm Datamap, dmls []DatamapLine) (int, error) { var datamapID int64 tx, err := m.DB.Begin() if err != nil { @@ -111,7 +113,7 @@ func (m *datamapLineModel) Insert(dm datamap, dmls []datamapLine) (int, error) { line.Key, line.Sheet, line.DataType, - line.Cellref) + line.CellRef) if err != nil { tx.Rollback() return 0, err diff --git a/cmd/dbasik-api/datamaps_test.go b/cmd/dbasik-api/datamaps_test.go index 428f301..827a054 100644 --- a/cmd/dbasik-api/datamaps_test.go +++ b/cmd/dbasik-api/datamaps_test.go @@ -6,45 +6,53 @@ import ( "time" ) -func TestReadDML(t *testing.T) { - dm := datamap{ - ID: 0, - Name: "Test Name", - Description: "Test description", - Created: time.Now(), - DMLs: []datamapLine{ - { - ID: 1, - Key: "Test Key", - Sheet: "Test Sheet", - DataType: "TEXT", - Cellref: "A10", - }, - { - ID: 2, - Key: "Test Key 2", - Sheet: "Test Sheet", - DataType: "TEXT", - Cellref: "A11", - }, - { - ID: 3, - Key: "Test Key 3", - Sheet: "Test Sheet 2", - DataType: "TEXT", - Cellref: "A12", +func TestGetSheetsFromDM(t *testing.T) { + testCases := []struct { + name string + datamap Datamap + expected []string + }{ + { + name: "Extract unique sheet names", + datamap: Datamap{ + ID: 0, + Name: "Test Name", + Description: "Test description", + Created: time.Now(), + DMLs: []DatamapLine{ + { + ID: 1, + Key: "Test Key", + Sheet: "Test Sheet", + DataType: "TEXT", + CellRef: "A10", + }, + { + ID: 2, + Key: "Test Key 2", + Sheet: "Test Sheet", + DataType: "TEXT", + CellRef: "A11", + }, + { + ID: 3, + Key: "Test Key 3", + Sheet: "Test Sheet 2", + DataType: "TEXT", + CellRef: "A12", + }, + }, }, + expected: []string{"Test Sheet", "Test Sheet 2"}, }, } - got := GetSheetsFromDM(dm) - if !slices.Contains(got, "Test Sheet") { - t.Errorf("expected to find Test Sheet in %v but didn't find it", got) - } - if !slices.Contains(got, "Test Sheet 2") { - t.Errorf("expected to find Test Sheet in %v but didn't find it", got) - } - if slices.Contains(got, "Test Sheet 3") { - t.Errorf("expected to find Test Sheet in %v but didn't find it", got) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := GetSheetsFromDM(tc.datamap) + if !slices.Equal(got, tc.expected) { + t.Errorf("GetSheetsFromDM(%v) = %v, expected %v", tc.datamap, got, tc.expected) + } + }) } } diff --git a/cmd/dbasik-api/handlers.go b/cmd/dbasik-api/handlers.go index c79de7e..b6b4443 100644 --- a/cmd/dbasik-api/handlers.go +++ b/cmd/dbasik-api/handlers.go @@ -53,7 +53,7 @@ func (app *application) createReturnHandler(w http.ResponseWriter, r *http.Reque // parse the csv reader := csv.NewReader(file) - var dmls []datamapLine + var dmls []DatamapLine for { line, err := reader.Read() @@ -69,16 +69,16 @@ func (app *application) createReturnHandler(w http.ResponseWriter, r *http.Reque return } - dmls = append(dmls, datamapLine{ + dmls = append(dmls, DatamapLine{ Key: line[0], Sheet: line[1], DataType: line[2], - Cellref: line[3], + CellRef: line[3], }) } - dm := datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls} + dm := Datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls} - // Parse the XLSX file based on the datamap... + // Parse the XLSX file based on the Datamap... // open an existing file wb, err := xlsx.OpenFile(dst.Name()) if err != nil { @@ -95,7 +95,7 @@ func (app *application) createReturnHandler(w http.ResponseWriter, r *http.Reque //Here is where we parse our files. - err = app.writeJSONPretty(w, http.StatusOK, envelope{"datamap": dm}, nil) + err = app.writeJSONPretty(w, http.StatusOK, envelope{"Datamap": dm}, nil) if err != nil { app.logger.Debug("writing out csv", "err", err) app.serverErrorResponse(w, r, err) @@ -128,8 +128,8 @@ func (app *application) createDatamapHandler(w http.ResponseWriter, r *http.Requ // parse the csv reader := csv.NewReader(file) - var dmls []datamapLine - var dm datamap + var dmls []DatamapLine + var dm Datamap for { line, err := reader.Read() @@ -145,16 +145,16 @@ func (app *application) createDatamapHandler(w http.ResponseWriter, r *http.Requ return } - dmls = append(dmls, datamapLine{ + dmls = append(dmls, DatamapLine{ Key: line[0], Sheet: line[1], DataType: line[2], - Cellref: line[3], + CellRef: line[3], }) } - dm = datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls} + dm = Datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls} - err = app.writeJSONPretty(w, http.StatusOK, envelope{"datamap": dm}, nil) + err = app.writeJSONPretty(w, http.StatusOK, envelope{"Datamap": dm}, nil) if err != nil { app.logger.Debug("writing out csv", "err", err) app.serverErrorResponse(w, r, err) @@ -185,8 +185,8 @@ func (app *application) saveDatamapHandler(w http.ResponseWriter, r *http.Reques // parse the csv reader := csv.NewReader(file) - var dmls []datamapLine - var dm datamap + var dmls []DatamapLine + var dm Datamap for { line, err := reader.Read() @@ -202,16 +202,16 @@ func (app *application) saveDatamapHandler(w http.ResponseWriter, r *http.Reques return } - dmls = append(dmls, datamapLine{ + dmls = append(dmls, DatamapLine{ ID: 0, Key: line[0], Sheet: line[1], DataType: line[2], - Cellref: line[3], + CellRef: line[3], }) } - dm = datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls} + dm = Datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls} // save to the database _, err = app.models.DatamapLines.Insert(dm, dmls) @@ -224,9 +224,9 @@ func (app *application) saveDatamapHandler(w http.ResponseWriter, r *http.Reques func (app *application) getJSONForDatamap(w http.ResponseWriter, r *http.Request) { // Get the DM out of the database - // dm = datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls} + // dm = Datamap{Name: dmName, Description: dmDesc, Created: time.Now(), DMLs: dmls} - // err = app.writeJSONPretty(w, http.StatusOK, envelope{"datamap": dm}, nil) + // err = app.writeJSONPretty(w, http.StatusOK, envelope{"Datamap": dm}, nil) // if err != nil { // app.logger.Debug("writing out csv", "err", err) // app.serverErrorResponse(w, r, err) @@ -242,11 +242,11 @@ func (app *application) showDatamapHandler(w http.ResponseWriter, r *http.Reques if err != nil || id_int < 1 { app.notFoundResponse(w, r) } - fmt.Fprintf(w, "show the details for datamap %d\n", id_int) + fmt.Fprintf(w, "show the details for Datamap %d\n", id_int) } func (app *application) createDatamapLine(w http.ResponseWriter, r *http.Request) { - var input datamapLine + var input DatamapLine err := json.NewDecoder(r.Body).Decode(&input) if err != nil { app.errorResponse(w, r, http.StatusBadRequest, err.Error()) diff --git a/cmd/dbasik-api/helpers.go b/cmd/dbasik-api/helpers.go index 3b07dc2..68d9f66 100644 --- a/cmd/dbasik-api/helpers.go +++ b/cmd/dbasik-api/helpers.go @@ -20,9 +20,19 @@ package main import ( "encoding/json" "net/http" + "regexp" ) -// We want this so that our JSON is nested under a key at the top, e.g. "datamap:"... +// validateSpreadsheetCell checks that the cellRef is in a valid format +func validateSpreadsheetCell(cellRef string) bool { + pattern := `^[A-Z]+[1-9][0-9]*$` + + regExp := regexp.MustCompile(pattern) + + return regExp.MatchString(cellRef) +} + +// We want this so that our JSON is nested under a key at the top, e.g. "Datamap:"... type envelope map[string]interface{} // writeJSON)Pretty() helper for sending responses - pretty prints output. This takes the destination http.ResponseWriter, the diff --git a/cmd/dbasik-api/return.go b/cmd/dbasik-api/return.go new file mode 100644 index 0000000..294c960 --- /dev/null +++ b/cmd/dbasik-api/return.go @@ -0,0 +1,130 @@ +package main + +import ( + "fmt" + "github.com/tealeg/xlsx/v3" + "path/filepath" +) + +type ReturnLine struct { + Sheet string + CellRef string + Value string +} + +type Return struct { + Name string + ReturnLines []ReturnLine +} + +// NewReturnLine creates a new ReturnLine object +func NewReturnLine(sheet, cellRef, value string) (*ReturnLine, error) { + if err := validateInputs(sheet, cellRef, value); err != nil { + return nil, err + } + + if !validateSpreadsheetCell(cellRef) { + return nil, fmt.Errorf("cellRef must be A1 format") + } + + return &ReturnLine{ + Sheet: sheet, + CellRef: cellRef, + Value: value, + }, nil +} + +func validateInputs(sheet, cellRef, value string) error { + if sheet == "" { + return fmt.Errorf("sheet parameter is required") + } + if cellRef == "" { + return fmt.Errorf("cellRef parameter is required") + } + if value == "" { + return fmt.Errorf("value parameter is required") + } + return nil +} + +func NewReturn(name string, dm *Datamap, returnLines []ReturnLine) (*Return, error) { + if len(returnLines) == 0 { + return nil, fmt.Errorf("ReturnLines must contain at least one ReturnLine") + } + + return &Return{ + Name: name, + ReturnLines: returnLines, + }, nil +} + +//func cellVisitor(c *xlsx.Cell) error { +// value, err := c.FormattedValue() +// if err != nil { +// fmt.Println(err.Error()) +// } else { +// fmt.Printf("Sheet: %s, Cell value: %s\n", c.Row.Sheet.Name, value) +// } +// return err +//} + +//func rowVisitor(r *xlsx.Row) error { +// return r.ForEachCell(cellVisitor, xlsx.SkipEmptyCells) +//} + +func ParseXLSX(filePath string, dm *Datamap) (*Return, error) { + // Use tealeg/xlsx to parse the Excel file + wb, err := xlsx.OpenFile(filePath) + if err != nil { + return nil, err + } + + // Get the set of sheets from the Datamap + sheets := GetSheetsFromDM(*dm) + + // Loop through all DatamapLines + returnLines := []ReturnLine{} + for _, dml := range dm.DMLs { + // Check if the sheet for this DatamapLine is in the set of sheets + if !contains(sheets, dml.Sheet) { + continue + } + + sh, ok := wb.Sheet[dml.Sheet] + if !ok { + return nil, fmt.Errorf("sheet %s not found in Excel file", dml.Sheet) + } + + col, row, err := xlsx.GetCoordsFromCellIDString(dml.CellRef) + if err != nil { + return nil, err + } + cell, err := sh.Cell(row, col) + if err != nil { + return nil, err + } + returnLines = append(returnLines, ReturnLine{ + Sheet: dml.Sheet, + CellRef: dml.CellRef, + Value: cell.Value, // or cell.FormattedValue() if you need formatted values + }) + } + + // Here we create a new Return object with the name of the Excel file and the ReturnLines slice + // that we just populated + rtn, err := NewReturn(filepath.Base(filePath), dm, returnLines) + if err != nil { + return nil, err + } + return rtn, nil +} + +// contains checks if a slice contains a given string +func contains(slice []string, str string) bool { + for _, s := range slice { + if s == str { + return true + } + } + return false +} diff --git a/cmd/dbasik-api/return_test.go b/cmd/dbasik-api/return_test.go new file mode 100644 index 0000000..c6a8664 --- /dev/null +++ b/cmd/dbasik-api/return_test.go @@ -0,0 +1,168 @@ +package main + +import ( + "reflect" + "testing" + "time" +) + +func TestCreateNewReturn(t *testing.T) { + dm := &Datamap{ + ID: 1, + Name: "test name", + Description: "test description", + Created: time.Now(), + DMLs: []DatamapLine{ + { + ID: 1, + Key: "test key", + Sheet: "test sheet", + DataType: "test datatype", + CellRef: "test cellref", + }, + }, + } + // Call NewReturn with an empty []ReturnLine slice + rt, err := NewReturn("test name", dm, []ReturnLine{}) + if err == nil { + t.Error("Expected an error when passing an empty []ReturnLine slice") + } + + // Check if the error message is as expected + expectedErrorMsg := "ReturnLines must contain at least one ReturnLine" + if err != nil && err.Error() != expectedErrorMsg { + t.Errorf("Unexpected error message. Expected: %s, Got: %s", expectedErrorMsg, err.Error()) + } + + // Check if the returned Return struct is nil + if rt != nil { + t.Error("Expected a nil Return struct when an error occurs") + } +} + +func TestNewReturnLine(t *testing.T) { + rl, err := NewReturnLine("stabs", "C1", "Knocker") + if err != nil { + t.Fatal(err) + } + if rl == nil { + t.Errorf("NewReturnLine() returned nil") + } + if rl.Sheet != "stabs" { + t.Errorf("NewReturnLine() returned wrong sheet") + } +} + +func TestReturnLineCellRefFormat(t *testing.T) { + _, err := NewReturnLine("stabs", "CC", "Knocker") + if err != nil { + if err.Error() != "cellRef must be A1 format" { + t.Errorf("NewReturnLine() returned wrong error") + } + } +} + +func TestValidateInputs(t *testing.T) { + // Happy path + err := validateInputs("Sheet1", "A1", "value") + if err != nil { + t.Errorf("validateInputs failed: %v", err) + } + + // Missing sheet + err = validateInputs("", "A1", "value") + if err == nil { + t.Error("Expected error for missing sheet") + } + if err.Error() != "sheet parameter is required" { + t.Error("Expected error for missing sheet") + } + // Missing cellRef + err = validateInputs("Sheet1", "", "value") + if err == nil { + t.Error("cellRef parameter is required") + } + + // Missing value + err = validateInputs("Sheet1", "A1", "") + if err == nil { + t.Error("value parameter is required") + } +} + +func TestHelper_validateSpreadsheetCell(t *testing.T) { + if validateSpreadsheetCell("19") != false { + t.Errorf("Helper.validateSpreadsheetCell() did not return false") + } + + if validateSpreadsheetCell("1") != false { + t.Errorf("Helper.validateSpreadsheetCell() did not return false") + } + + if validateSpreadsheetCell("A10") != true { + t.Errorf("Helper.validateSpreadsheetCell() did not return true") + } +} + +func TestParseXLSX(t *testing.T) { + tests := []struct { + name string + filePath string + dm *Datamap + want *Return + wantErr bool + }{ + { + name: "Valid_Excel_file", + filePath: "../../testdata/valid_excel.xlsx", + dm: &Datamap{ + DMLs: []DatamapLine{ + {Sheet: "Sheet1", CellRef: "A1"}, + {Sheet: "Sheet1", CellRef: "B1"}, + {Sheet: "Sheet2", CellRef: "C1"}, + }, + }, + want: &Return{ + Name: "valid_excel.xlsx", + ReturnLines: []ReturnLine{ + {Sheet: "Sheet1", CellRef: "A1", Value: "Value 1"}, + {Sheet: "Sheet1", CellRef: "B1", Value: "Value 2"}, + {Sheet: "Sheet2", CellRef: "C1", Value: "Value 3"}, + }, + }, + wantErr: false, + }, + // Add more test cases as needed + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseXLSX(tt.filePath, tt.dm) + if (err != nil) != tt.wantErr { + t.Errorf("ParseXLSX() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !reflect.DeepEqual(got.Name, tt.want.Name) { + t.Errorf("ParseXLSX() FileName = %v, want %v", got.Name, tt.want.Name) + } + + if len(got.ReturnLines) != len(tt.want.ReturnLines) { + t.Errorf("ParseXLSX() ReturnLines length = %v, want %v", len(got.ReturnLines), len(tt.want.ReturnLines)) + return + } + + for i := range got.ReturnLines { + if got.ReturnLines[i].Sheet != tt.want.ReturnLines[i].Sheet { + t.Errorf("ParseXLSX() ReturnLines[%d].Sheet = %v, want %v", i, got.ReturnLines[i].Sheet, tt.want.ReturnLines[i].Sheet) + } + if got.ReturnLines[i].CellRef != tt.want.ReturnLines[i].CellRef { + t.Errorf("ParseXLSX() ReturnLines[%d].CellRef = %v, want %v", i, got.ReturnLines[i].CellRef, tt.want.ReturnLines[i].CellRef) + } + if got.ReturnLines[i].Value != tt.want.ReturnLines[i].Value { + t.Errorf("ParseXLSX() ReturnLines[%d].Value = %v, want %v", i, got.ReturnLines[i].Value, tt.want.ReturnLines[i].Value) + } + } + }) + } +} diff --git a/datamap.csv b/datamap.csv index 9a9e13c..1e0f0a8 100644 --- a/datamap.csv +++ b/datamap.csv @@ -1,3 +1,3 @@ Project/Programme Name,rtp_template,TEXT,E6 -Acronym,rtp_template,TEXT,E12 +Acronym,rtp_template,TEXT,E6 diff --git a/testdata/valid_excel.xlsx b/testdata/valid_excel.xlsx Binary files differnew file mode 100644 index 0000000..b488693 --- /dev/null +++ b/testdata/valid_excel.xlsx |