Case Study 1: Robust File Processing Pipeline

The Scenario

Rosa Martinelli imports bank statements into PennyWise. Her bank exports CSV files, but the files are not always clean: some rows have missing fields, some have currency symbols in the amount column, some have dates in inconsistent formats, and occasionally the file is truncated (the bank's export process sometimes fails mid-file).

Rosa needs a file processing pipeline that: 1. Opens the file safely (handling "file not found" and permission errors) 2. Reads and parses each line (handling format errors per-line without aborting the whole import) 3. Validates each parsed record (rejecting records with invalid data) 4. Reports a summary of what was imported and what was skipped 5. Never leaves files open or data in an inconsistent state

This case study builds such a pipeline using exception handling at every level.

The Implementation

Custom Exceptions

unit BankImport.Exceptions;

{$mode objfpc}{$H+}

interface

uses
  SysUtils;

type
  EImportError = class(Exception);
  EFileAccessError = class(EImportError);
  EParseError = class(EImportError)
  private
    FLineNumber: Integer;
    FLineContent: String;
  public
    constructor Create(ALineNumber: Integer; const ALineContent, AMessage: String);
    property LineNumber: Integer read FLineNumber;
    property LineContent: String read FLineContent;
  end;
  EValidationError = class(EImportError)
  private
    FFieldName: String;
  public
    constructor Create(const AFieldName, AMessage: String);
    property FieldName: String read FFieldName;
  end;

implementation

constructor EParseError.Create(ALineNumber: Integer;
  const ALineContent, AMessage: String);
begin
  inherited CreateFmt('Line %d: %s (content: "%s")',
    [ALineNumber, AMessage, ALineContent]);
  FLineNumber := ALineNumber;
  FLineContent := ALineContent;
end;

constructor EValidationError.Create(const AFieldName, AMessage: String);
begin
  inherited CreateFmt('Validation failed for %s: %s', [AFieldName, AMessage]);
  FFieldName := AFieldName;
end;

end.

The Import Record

unit BankImport.Types;

{$mode objfpc}{$H+}

interface

uses
  SysUtils;

type
  TBankTransaction = record
    Date: TDateTime;
    Description: String;
    Amount: Currency;
    Category: String;
    Reference: String;
  end;

  TImportResult = record
    TotalLines: Integer;
    SuccessCount: Integer;
    SkippedCount: Integer;
    ErrorMessages: array of String;
  end;

implementation

end.

The Line Parser

unit BankImport.Parser;

{$mode objfpc}{$H+}

interface

uses
  SysUtils, BankImport.Types, BankImport.Exceptions;

function ParseBankLine(const ALine: String; ALineNumber: Integer): TBankTransaction;

implementation

function CleanAmount(const S: String): String;
var
  i: Integer;
begin
  Result := '';
  for i := 1 to Length(S) do
    if S[i] in ['0'..'9', '.', '-'] then
      Result := Result + S[i];
end;

function ParseBankLine(const ALine: String; ALineNumber: Integer): TBankTransaction;
var
  Parts: TStringArray;
  CleanAmt: String;
begin
  if Trim(ALine) = '' then
    raise EParseError.Create(ALineNumber, ALine, 'Empty line');

  Parts := ALine.Split([',']);

  if Length(Parts) < 4 then
    raise EParseError.Create(ALineNumber, ALine,
      Format('Expected 4+ columns, found %d', [Length(Parts)]));

  { Parse date }
  try
    Result.Date := StrToDate(Trim(Parts[0]));
  except
    on E: EConvertError do
      raise EParseError.Create(ALineNumber, ALine,
        'Invalid date format: "' + Trim(Parts[0]) + '"');
  end;

  { Parse description }
  Result.Description := Trim(Parts[1]);
  if Result.Description = '' then
    raise EParseError.Create(ALineNumber, ALine, 'Empty description');

  { Parse amount — strip currency symbols }
  CleanAmt := CleanAmount(Trim(Parts[2]));
  try
    Result.Amount := StrToCurr(CleanAmt);
  except
    on E: EConvertError do
      raise EParseError.Create(ALineNumber, ALine,
        'Invalid amount: "' + Trim(Parts[2]) + '"');
  end;

  { Parse category }
  Result.Category := Trim(Parts[3]);

  { Reference is optional }
  if Length(Parts) >= 5 then
    Result.Reference := Trim(Parts[4])
  else
    Result.Reference := '';
end;

end.

The Validator

unit BankImport.Validator;

{$mode objfpc}{$H+}

interface

uses
  SysUtils, BankImport.Types, BankImport.Exceptions;

procedure ValidateTransaction(const ATrans: TBankTransaction);

implementation

procedure ValidateTransaction(const ATrans: TBankTransaction);
begin
  if ATrans.Amount = 0 then
    raise EValidationError.Create('Amount', 'Amount cannot be zero');

  if ATrans.Date > Now then
    raise EValidationError.Create('Date',
      'Transaction date is in the future: ' + DateToStr(ATrans.Date));

  if ATrans.Date < EncodeDate(2000, 1, 1) then
    raise EValidationError.Create('Date',
      'Transaction date is before year 2000: ' + DateToStr(ATrans.Date));

  if Length(ATrans.Description) < 2 then
    raise EValidationError.Create('Description',
      'Description too short (minimum 2 characters)');

  if ATrans.Category = '' then
    raise EValidationError.Create('Category', 'Category is required');
end;

end.

The Import Engine

unit BankImport.Engine;

{$mode objfpc}{$H+}

interface

uses
  SysUtils, BankImport.Types, BankImport.Exceptions,
  BankImport.Parser, BankImport.Validator;

function ImportBankFile(const AFileName: String): TImportResult;

implementation

procedure AddError(var AResult: TImportResult; const AMessage: String);
var
  Len: Integer;
begin
  Len := Length(AResult.ErrorMessages);
  SetLength(AResult.ErrorMessages, Len + 1);
  AResult.ErrorMessages[Len] := AMessage;
end;

function ImportBankFile(const AFileName: String): TImportResult;
var
  F: TextFile;
  Line: String;
  LineNum: Integer;
  Trans: TBankTransaction;
begin
  Result.TotalLines := 0;
  Result.SuccessCount := 0;
  Result.SkippedCount := 0;
  SetLength(Result.ErrorMessages, 0);

  { Check file existence first — provide a clear message }
  if not FileExists(AFileName) then
    raise EFileAccessError.CreateFmt(
      'File not found: "%s". Check the path and try again.', [AFileName]);

  AssignFile(F, AFileName);
  try
    Reset(F);
  except
    on E: EInOutError do
      raise EFileAccessError.CreateFmt(
        'Cannot open file "%s": %s', [AFileName, E.Message]);
  end;

  try
    { Skip header line if present }
    if not EOF(F) then
    begin
      ReadLn(F, Line);
      Inc(Result.TotalLines);
      if (Pos('Date', Line) > 0) or (Pos('date', Line) > 0) then
        { This looks like a header — skip it }
      else
      begin
        { Not a header — process as data }
        try
          Trans := ParseBankLine(Line, Result.TotalLines);
          ValidateTransaction(Trans);
          Inc(Result.SuccessCount);
        except
          on E: EParseError do
          begin
            Inc(Result.SkippedCount);
            AddError(Result, E.Message);
          end;
          on E: EValidationError do
          begin
            Inc(Result.SkippedCount);
            AddError(Result, Format('Line %d: %s', [Result.TotalLines, E.Message]));
          end;
        end;
      end;
    end;

    { Process remaining lines }
    while not EOF(F) do
    begin
      Inc(Result.TotalLines);
      try
        ReadLn(F, Line);
        Trans := ParseBankLine(Line, Result.TotalLines);
        ValidateTransaction(Trans);
        { In a real application: store the transaction }
        Inc(Result.SuccessCount);
      except
        on E: EParseError do
        begin
          Inc(Result.SkippedCount);
          AddError(Result, E.Message);
        end;
        on E: EValidationError do
        begin
          Inc(Result.SkippedCount);
          AddError(Result, Format('Line %d: %s', [Result.TotalLines, E.Message]));
        end;
        on E: EInOutError do
        begin
          AddError(Result, Format('Read error at line %d: %s',
            [Result.TotalLines, E.Message]));
          Break;  { Stop reading — the file may be truncated }
        end;
      end;
    end;
  finally
    CloseFile(F);  { Always close the file }
  end;
end;

end.

Demonstration Program

program BankImportDemo;

{$mode objfpc}{$H+}

uses
  SysUtils, BankImport.Types, BankImport.Exceptions, BankImport.Engine;

procedure PrintImportResult(const AResult: TImportResult);
var
  i: Integer;
begin
  WriteLn;
  WriteLn('=== Import Summary ===');
  WriteLn(Format('Total lines:  %d', [AResult.TotalLines]));
  WriteLn(Format('Imported:     %d', [AResult.SuccessCount]));
  WriteLn(Format('Skipped:      %d', [AResult.SkippedCount]));

  if Length(AResult.ErrorMessages) > 0 then
  begin
    WriteLn;
    WriteLn('Errors:');
    for i := 0 to High(AResult.ErrorMessages) do
      WriteLn(Format('  %d. %s', [i + 1, AResult.ErrorMessages[i]]));
  end;
  WriteLn('======================');
end;

var
  ImportResult: TImportResult;
begin
  WriteLn('PennyWise Bank Statement Import');
  WriteLn;

  try
    ImportResult := ImportBankFile('bank_statement.csv');
    PrintImportResult(ImportResult);
  except
    on E: EFileAccessError do
      WriteLn('File error: ', E.Message);
    on E: EImportError do
      WriteLn('Import error: ', E.Message);
    on E: Exception do
      WriteLn('Unexpected error: [', E.ClassName, '] ', E.Message);
  end;
end.

Design Analysis

Exception Handling at Three Levels

This pipeline handles errors at three distinct levels:

  1. File level (EFileAccessError): File not found or cannot be opened. These abort the entire import. There is no point processing lines if we cannot read the file.

  2. Line level (EParseError): A malformed line. These skip the individual line but continue processing the rest. One bad row should not invalidate 999 good rows.

  3. Validation level (EValidationError): A parseable but semantically invalid record. Same handling as parse errors — skip and continue.

This graduated approach — abort for catastrophic errors, skip-and-continue for recoverable ones — is the hallmark of robust data processing. It mirrors what real-world import tools do.

The Error Report

Instead of just printing errors to the console, the engine collects them in TImportResult.ErrorMessages. This allows the caller to decide how to present errors — in a console, in a GUI dialog, in a log file, or in an email notification. The engine produces data; the presentation layer presents it.

Key Takeaways

  1. Custom exceptions carry context. EParseError includes the line number and content. EValidationError includes the field name. This makes error messages actionable.
  2. try..finally guarantees file closure. No matter what happens during parsing, the file is closed.
  3. Graduated error handling. File-level errors are fatal; line-level errors are skipped. The pipeline adapts its response to the severity.
  4. Summary reporting. The caller gets structured results, not just printed messages. This supports testing, logging, and UI display.