Case Study 2: Migrating File-Based Data to SQLite
Overview
PennyWise has been storing data in flat files since Chapter 13. Now it uses SQLite. But Rosa has a year of expense data in the old format. This case study walks through building a migration tool that reads the old file format and imports it into the new database — a common real-world requirement when upgrading an application's data layer.
The Old Format
PennyWise's flat-file format (from Chapter 29's project checkpoint) uses tab-separated values with a header:
PENNYWISE_V1
42
2025-01-15 Groceries Food & Dining 45.99
2025-01-16 Bus pass Transportation 65.00
...
Line 1 is a format identifier. Line 2 is the record count. Lines 3+ are tab-separated data rows.
Migration Strategy
- Read the old file and parse it into an array of records.
- Map categories to IDs. The old format uses category names; the new database uses category IDs. Create any missing categories in the database.
- Insert expenses using parameterized queries.
- Detect duplicates by checking if an expense with the same date, description, and amount already exists.
- Report results — how many imported, how many skipped (duplicates), how many errors.
Implementation
unit MigrationUnit;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, sqlite3conn, sqldb;
type
TMigrationResult = record
TotalRead: Integer;
Imported: Integer;
Duplicates: Integer;
Errors: Integer;
ErrorMessages: TStringList;
end;
function MigrateFromFlatFile(const FileName: string;
Conn: TSQLite3Connection; Trans: TSQLTransaction): TMigrationResult;
implementation
function GetOrCreateCategory(Conn: TSQLite3Connection;
Trans: TSQLTransaction; const CatName: string): Integer;
var
Q: TSQLQuery;
begin
Q := TSQLQuery.Create(nil);
try
Q.Database := Conn;
Q.Transaction := Trans;
{ Try to find existing category }
Q.SQL.Text := 'SELECT id FROM categories WHERE name = :name';
Q.Params.ParamByName('name').AsString := CatName;
Q.Open;
if not Q.EOF then
begin
Result := Q.FieldByName('id').AsInteger;
Q.Close;
Exit;
end;
Q.Close;
{ Create new category }
Q.SQL.Text := 'INSERT INTO categories (name) VALUES (:name)';
Q.Params.ParamByName('name').AsString := CatName;
Q.ExecSQL;
Trans.Commit;
{ Get the new ID }
Q.SQL.Text := 'SELECT last_insert_rowid() as id';
Q.Open;
Result := Q.FieldByName('id').AsInteger;
Q.Close;
finally
Q.Free;
end;
end;
function IsDuplicate(Conn: TSQLite3Connection; Trans: TSQLTransaction;
const ADate, ADesc: string; AAmount: Double): Boolean;
var
Q: TSQLQuery;
begin
Q := TSQLQuery.Create(nil);
try
Q.Database := Conn;
Q.Transaction := Trans;
Q.SQL.Text :=
'SELECT COUNT(*) as cnt FROM expenses ' +
'WHERE date = :date AND description = :desc AND ' +
' ABS(amount - :amt) < 0.01';
Q.Params.ParamByName('date').AsString := ADate;
Q.Params.ParamByName('desc').AsString := ADesc;
Q.Params.ParamByName('amt').AsFloat := AAmount;
Q.Open;
Result := Q.FieldByName('cnt').AsInteger > 0;
Q.Close;
finally
Q.Free;
end;
end;
function MigrateFromFlatFile(const FileName: string;
Conn: TSQLite3Connection; Trans: TSQLTransaction): TMigrationResult;
var
F: TextFile;
Line, Header: string;
Count, I: Integer;
Parts: TStringArray;
CatID: Integer;
Amount: Double;
InsertQ: TSQLQuery;
begin
Result.TotalRead := 0;
Result.Imported := 0;
Result.Duplicates := 0;
Result.Errors := 0;
Result.ErrorMessages := TStringList.Create;
if not FileExists(FileName) then
begin
Result.ErrorMessages.Add('File not found: ' + FileName);
Exit;
end;
InsertQ := TSQLQuery.Create(nil);
try
InsertQ.Database := Conn;
InsertQ.Transaction := Trans;
AssignFile(F, FileName);
Reset(F);
try
{ Read and validate header }
ReadLn(F, Header);
if Header <> 'PENNYWISE_V1' then
begin
Result.ErrorMessages.Add('Invalid file format: ' + Header);
CloseFile(F);
Exit;
end;
ReadLn(F, Count);
Result.TotalRead := Count;
{ Process each record }
for I := 1 to Count do
begin
if EOF(F) then Break;
ReadLn(F, Line);
Parts := Line.Split(#9);
if Length(Parts) < 4 then
begin
Inc(Result.Errors);
Result.ErrorMessages.Add(Format('Line %d: too few fields', [I + 2]));
Continue;
end;
{ Parse amount }
if not TryStrToFloat(Parts[3], Amount) then
begin
Inc(Result.Errors);
Result.ErrorMessages.Add(Format('Line %d: invalid amount "%s"',
[I + 2, Parts[3]]));
Continue;
end;
{ Check for duplicates }
if IsDuplicate(Conn, Trans, Parts[0], Parts[1], Amount) then
begin
Inc(Result.Duplicates);
Continue;
end;
{ Get or create category }
CatID := GetOrCreateCategory(Conn, Trans, Parts[2]);
{ Insert expense }
try
InsertQ.SQL.Text :=
'INSERT INTO expenses (date, description, category_id, amount) ' +
'VALUES (:date, :desc, :catid, :amount)';
InsertQ.Params.ParamByName('date').AsString := Parts[0];
InsertQ.Params.ParamByName('desc').AsString := Parts[1];
InsertQ.Params.ParamByName('catid').AsInteger := CatID;
InsertQ.Params.ParamByName('amount').AsFloat := Amount;
InsertQ.ExecSQL;
Inc(Result.Imported);
except
on E: Exception do
begin
Inc(Result.Errors);
Result.ErrorMessages.Add(Format('Line %d: %s', [I + 2, E.Message]));
end;
end;
end;
Trans.Commit;
finally
CloseFile(F);
end;
finally
InsertQ.Free;
end;
end;
end.
Using the Migration Tool
procedure TfrmMain.miImportOldFileClick(Sender: TObject);
var
MResult: TMigrationResult;
begin
if OpenDialog1.Execute then
begin
MResult := MigrateFromFlatFile(OpenDialog1.FileName, FConn, FTrans);
try
MessageDlg('Migration Complete',
Format('Records read: %d' + LineEnding +
'Imported: %d' + LineEnding +
'Duplicates skipped: %d' + LineEnding +
'Errors: %d',
[MResult.TotalRead, MResult.Imported,
MResult.Duplicates, MResult.Errors]),
mtInformation, [mbOK], 0);
if MResult.Errors > 0 then
begin
{ Show error details }
ShowMessage('Errors:' + LineEnding + MResult.ErrorMessages.Text);
end;
FQuery.Refresh;
UpdateCharts;
finally
MResult.ErrorMessages.Free;
end;
end;
end;
Lessons Learned
- Data migration is a real-world skill. Almost every application upgrade involves migrating old data to a new format.
- Defensive parsing handles malformed data gracefully — bad lines are logged, not crashes.
- Duplicate detection prevents importing the same data twice if the user runs the migration again.
- Category mapping (name to ID) bridges the old format's text categories and the new format's normalized references.
- Transaction commit after the loop ensures all-or-nothing: either all records import or none do. For very large files, you might commit in batches.
- Error reporting tells the user exactly what happened — not just "migration complete" but counts and details.