Case Study 2: Migrating File-Based Data to SQLite

Overview

PennyWise has been storing data in flat files since Chapter 13. Now it uses SQLite. But Rosa has a year of expense data in the old format. This case study walks through building a migration tool that reads the old file format and imports it into the new database — a common real-world requirement when upgrading an application's data layer.


The Old Format

PennyWise's flat-file format (from Chapter 29's project checkpoint) uses tab-separated values with a header:

PENNYWISE_V1
42
2025-01-15  Groceries   Food & Dining   45.99
2025-01-16  Bus pass    Transportation  65.00
...

Line 1 is a format identifier. Line 2 is the record count. Lines 3+ are tab-separated data rows.


Migration Strategy

  1. Read the old file and parse it into an array of records.
  2. Map categories to IDs. The old format uses category names; the new database uses category IDs. Create any missing categories in the database.
  3. Insert expenses using parameterized queries.
  4. Detect duplicates by checking if an expense with the same date, description, and amount already exists.
  5. Report results — how many imported, how many skipped (duplicates), how many errors.

Implementation

unit MigrationUnit;

{$mode objfpc}{$H+}

interface

uses
  Classes, SysUtils, sqlite3conn, sqldb;

type
  TMigrationResult = record
    TotalRead: Integer;
    Imported: Integer;
    Duplicates: Integer;
    Errors: Integer;
    ErrorMessages: TStringList;
  end;

function MigrateFromFlatFile(const FileName: string;
  Conn: TSQLite3Connection; Trans: TSQLTransaction): TMigrationResult;

implementation

function GetOrCreateCategory(Conn: TSQLite3Connection;
  Trans: TSQLTransaction; const CatName: string): Integer;
var
  Q: TSQLQuery;
begin
  Q := TSQLQuery.Create(nil);
  try
    Q.Database := Conn;
    Q.Transaction := Trans;

    { Try to find existing category }
    Q.SQL.Text := 'SELECT id FROM categories WHERE name = :name';
    Q.Params.ParamByName('name').AsString := CatName;
    Q.Open;
    if not Q.EOF then
    begin
      Result := Q.FieldByName('id').AsInteger;
      Q.Close;
      Exit;
    end;
    Q.Close;

    { Create new category }
    Q.SQL.Text := 'INSERT INTO categories (name) VALUES (:name)';
    Q.Params.ParamByName('name').AsString := CatName;
    Q.ExecSQL;
    Trans.Commit;

    { Get the new ID }
    Q.SQL.Text := 'SELECT last_insert_rowid() as id';
    Q.Open;
    Result := Q.FieldByName('id').AsInteger;
    Q.Close;
  finally
    Q.Free;
  end;
end;

function IsDuplicate(Conn: TSQLite3Connection; Trans: TSQLTransaction;
  const ADate, ADesc: string; AAmount: Double): Boolean;
var
  Q: TSQLQuery;
begin
  Q := TSQLQuery.Create(nil);
  try
    Q.Database := Conn;
    Q.Transaction := Trans;
    Q.SQL.Text :=
      'SELECT COUNT(*) as cnt FROM expenses ' +
      'WHERE date = :date AND description = :desc AND ' +
      '      ABS(amount - :amt) < 0.01';
    Q.Params.ParamByName('date').AsString := ADate;
    Q.Params.ParamByName('desc').AsString := ADesc;
    Q.Params.ParamByName('amt').AsFloat := AAmount;
    Q.Open;
    Result := Q.FieldByName('cnt').AsInteger > 0;
    Q.Close;
  finally
    Q.Free;
  end;
end;

function MigrateFromFlatFile(const FileName: string;
  Conn: TSQLite3Connection; Trans: TSQLTransaction): TMigrationResult;
var
  F: TextFile;
  Line, Header: string;
  Count, I: Integer;
  Parts: TStringArray;
  CatID: Integer;
  Amount: Double;
  InsertQ: TSQLQuery;
begin
  Result.TotalRead := 0;
  Result.Imported := 0;
  Result.Duplicates := 0;
  Result.Errors := 0;
  Result.ErrorMessages := TStringList.Create;

  if not FileExists(FileName) then
  begin
    Result.ErrorMessages.Add('File not found: ' + FileName);
    Exit;
  end;

  InsertQ := TSQLQuery.Create(nil);
  try
    InsertQ.Database := Conn;
    InsertQ.Transaction := Trans;

    AssignFile(F, FileName);
    Reset(F);
    try
      { Read and validate header }
      ReadLn(F, Header);
      if Header <> 'PENNYWISE_V1' then
      begin
        Result.ErrorMessages.Add('Invalid file format: ' + Header);
        CloseFile(F);
        Exit;
      end;

      ReadLn(F, Count);
      Result.TotalRead := Count;

      { Process each record }
      for I := 1 to Count do
      begin
        if EOF(F) then Break;
        ReadLn(F, Line);
        Parts := Line.Split(#9);

        if Length(Parts) < 4 then
        begin
          Inc(Result.Errors);
          Result.ErrorMessages.Add(Format('Line %d: too few fields', [I + 2]));
          Continue;
        end;

        { Parse amount }
        if not TryStrToFloat(Parts[3], Amount) then
        begin
          Inc(Result.Errors);
          Result.ErrorMessages.Add(Format('Line %d: invalid amount "%s"',
            [I + 2, Parts[3]]));
          Continue;
        end;

        { Check for duplicates }
        if IsDuplicate(Conn, Trans, Parts[0], Parts[1], Amount) then
        begin
          Inc(Result.Duplicates);
          Continue;
        end;

        { Get or create category }
        CatID := GetOrCreateCategory(Conn, Trans, Parts[2]);

        { Insert expense }
        try
          InsertQ.SQL.Text :=
            'INSERT INTO expenses (date, description, category_id, amount) ' +
            'VALUES (:date, :desc, :catid, :amount)';
          InsertQ.Params.ParamByName('date').AsString := Parts[0];
          InsertQ.Params.ParamByName('desc').AsString := Parts[1];
          InsertQ.Params.ParamByName('catid').AsInteger := CatID;
          InsertQ.Params.ParamByName('amount').AsFloat := Amount;
          InsertQ.ExecSQL;
          Inc(Result.Imported);
        except
          on E: Exception do
          begin
            Inc(Result.Errors);
            Result.ErrorMessages.Add(Format('Line %d: %s', [I + 2, E.Message]));
          end;
        end;
      end;

      Trans.Commit;
    finally
      CloseFile(F);
    end;
  finally
    InsertQ.Free;
  end;
end;

end.

Using the Migration Tool

procedure TfrmMain.miImportOldFileClick(Sender: TObject);
var
  MResult: TMigrationResult;
begin
  if OpenDialog1.Execute then
  begin
    MResult := MigrateFromFlatFile(OpenDialog1.FileName, FConn, FTrans);
    try
      MessageDlg('Migration Complete',
        Format('Records read: %d' + LineEnding +
               'Imported: %d' + LineEnding +
               'Duplicates skipped: %d' + LineEnding +
               'Errors: %d',
               [MResult.TotalRead, MResult.Imported,
                MResult.Duplicates, MResult.Errors]),
        mtInformation, [mbOK], 0);

      if MResult.Errors > 0 then
      begin
        { Show error details }
        ShowMessage('Errors:' + LineEnding + MResult.ErrorMessages.Text);
      end;

      FQuery.Refresh;
      UpdateCharts;
    finally
      MResult.ErrorMessages.Free;
    end;
  end;
end;

Lessons Learned

  1. Data migration is a real-world skill. Almost every application upgrade involves migrating old data to a new format.
  2. Defensive parsing handles malformed data gracefully — bad lines are logged, not crashes.
  3. Duplicate detection prevents importing the same data twice if the user runs the migration again.
  4. Category mapping (name to ID) bridges the old format's text categories and the new format's normalized references.
  5. Transaction commit after the loop ensures all-or-nothing: either all records import or none do. For very large files, you might commit in batches.
  6. Error reporting tells the user exactly what happened — not just "migration complete" but counts and details.