Case Study 1: Robust File Processing Pipeline
The Scenario
Rosa Martinelli imports bank statements into PennyWise. Her bank exports CSV files, but the files are not always clean: some rows have missing fields, some have currency symbols in the amount column, some have dates in inconsistent formats, and occasionally the file is truncated (the bank's export process sometimes fails mid-file).
Rosa needs a file processing pipeline that: 1. Opens the file safely (handling "file not found" and permission errors) 2. Reads and parses each line (handling format errors per-line without aborting the whole import) 3. Validates each parsed record (rejecting records with invalid data) 4. Reports a summary of what was imported and what was skipped 5. Never leaves files open or data in an inconsistent state
This case study builds such a pipeline using exception handling at every level.
The Implementation
Custom Exceptions
unit BankImport.Exceptions;
{$mode objfpc}{$H+}
interface
uses
SysUtils;
type
EImportError = class(Exception);
EFileAccessError = class(EImportError);
EParseError = class(EImportError)
private
FLineNumber: Integer;
FLineContent: String;
public
constructor Create(ALineNumber: Integer; const ALineContent, AMessage: String);
property LineNumber: Integer read FLineNumber;
property LineContent: String read FLineContent;
end;
EValidationError = class(EImportError)
private
FFieldName: String;
public
constructor Create(const AFieldName, AMessage: String);
property FieldName: String read FFieldName;
end;
implementation
constructor EParseError.Create(ALineNumber: Integer;
const ALineContent, AMessage: String);
begin
inherited CreateFmt('Line %d: %s (content: "%s")',
[ALineNumber, AMessage, ALineContent]);
FLineNumber := ALineNumber;
FLineContent := ALineContent;
end;
constructor EValidationError.Create(const AFieldName, AMessage: String);
begin
inherited CreateFmt('Validation failed for %s: %s', [AFieldName, AMessage]);
FFieldName := AFieldName;
end;
end.
The Import Record
unit BankImport.Types;
{$mode objfpc}{$H+}
interface
uses
SysUtils;
type
TBankTransaction = record
Date: TDateTime;
Description: String;
Amount: Currency;
Category: String;
Reference: String;
end;
TImportResult = record
TotalLines: Integer;
SuccessCount: Integer;
SkippedCount: Integer;
ErrorMessages: array of String;
end;
implementation
end.
The Line Parser
unit BankImport.Parser;
{$mode objfpc}{$H+}
interface
uses
SysUtils, BankImport.Types, BankImport.Exceptions;
function ParseBankLine(const ALine: String; ALineNumber: Integer): TBankTransaction;
implementation
function CleanAmount(const S: String): String;
var
i: Integer;
begin
Result := '';
for i := 1 to Length(S) do
if S[i] in ['0'..'9', '.', '-'] then
Result := Result + S[i];
end;
function ParseBankLine(const ALine: String; ALineNumber: Integer): TBankTransaction;
var
Parts: TStringArray;
CleanAmt: String;
begin
if Trim(ALine) = '' then
raise EParseError.Create(ALineNumber, ALine, 'Empty line');
Parts := ALine.Split([',']);
if Length(Parts) < 4 then
raise EParseError.Create(ALineNumber, ALine,
Format('Expected 4+ columns, found %d', [Length(Parts)]));
{ Parse date }
try
Result.Date := StrToDate(Trim(Parts[0]));
except
on E: EConvertError do
raise EParseError.Create(ALineNumber, ALine,
'Invalid date format: "' + Trim(Parts[0]) + '"');
end;
{ Parse description }
Result.Description := Trim(Parts[1]);
if Result.Description = '' then
raise EParseError.Create(ALineNumber, ALine, 'Empty description');
{ Parse amount — strip currency symbols }
CleanAmt := CleanAmount(Trim(Parts[2]));
try
Result.Amount := StrToCurr(CleanAmt);
except
on E: EConvertError do
raise EParseError.Create(ALineNumber, ALine,
'Invalid amount: "' + Trim(Parts[2]) + '"');
end;
{ Parse category }
Result.Category := Trim(Parts[3]);
{ Reference is optional }
if Length(Parts) >= 5 then
Result.Reference := Trim(Parts[4])
else
Result.Reference := '';
end;
end.
The Validator
unit BankImport.Validator;
{$mode objfpc}{$H+}
interface
uses
SysUtils, BankImport.Types, BankImport.Exceptions;
procedure ValidateTransaction(const ATrans: TBankTransaction);
implementation
procedure ValidateTransaction(const ATrans: TBankTransaction);
begin
if ATrans.Amount = 0 then
raise EValidationError.Create('Amount', 'Amount cannot be zero');
if ATrans.Date > Now then
raise EValidationError.Create('Date',
'Transaction date is in the future: ' + DateToStr(ATrans.Date));
if ATrans.Date < EncodeDate(2000, 1, 1) then
raise EValidationError.Create('Date',
'Transaction date is before year 2000: ' + DateToStr(ATrans.Date));
if Length(ATrans.Description) < 2 then
raise EValidationError.Create('Description',
'Description too short (minimum 2 characters)');
if ATrans.Category = '' then
raise EValidationError.Create('Category', 'Category is required');
end;
end.
The Import Engine
unit BankImport.Engine;
{$mode objfpc}{$H+}
interface
uses
SysUtils, BankImport.Types, BankImport.Exceptions,
BankImport.Parser, BankImport.Validator;
function ImportBankFile(const AFileName: String): TImportResult;
implementation
procedure AddError(var AResult: TImportResult; const AMessage: String);
var
Len: Integer;
begin
Len := Length(AResult.ErrorMessages);
SetLength(AResult.ErrorMessages, Len + 1);
AResult.ErrorMessages[Len] := AMessage;
end;
function ImportBankFile(const AFileName: String): TImportResult;
var
F: TextFile;
Line: String;
LineNum: Integer;
Trans: TBankTransaction;
begin
Result.TotalLines := 0;
Result.SuccessCount := 0;
Result.SkippedCount := 0;
SetLength(Result.ErrorMessages, 0);
{ Check file existence first — provide a clear message }
if not FileExists(AFileName) then
raise EFileAccessError.CreateFmt(
'File not found: "%s". Check the path and try again.', [AFileName]);
AssignFile(F, AFileName);
try
Reset(F);
except
on E: EInOutError do
raise EFileAccessError.CreateFmt(
'Cannot open file "%s": %s', [AFileName, E.Message]);
end;
try
{ Skip header line if present }
if not EOF(F) then
begin
ReadLn(F, Line);
Inc(Result.TotalLines);
if (Pos('Date', Line) > 0) or (Pos('date', Line) > 0) then
{ This looks like a header — skip it }
else
begin
{ Not a header — process as data }
try
Trans := ParseBankLine(Line, Result.TotalLines);
ValidateTransaction(Trans);
Inc(Result.SuccessCount);
except
on E: EParseError do
begin
Inc(Result.SkippedCount);
AddError(Result, E.Message);
end;
on E: EValidationError do
begin
Inc(Result.SkippedCount);
AddError(Result, Format('Line %d: %s', [Result.TotalLines, E.Message]));
end;
end;
end;
end;
{ Process remaining lines }
while not EOF(F) do
begin
Inc(Result.TotalLines);
try
ReadLn(F, Line);
Trans := ParseBankLine(Line, Result.TotalLines);
ValidateTransaction(Trans);
{ In a real application: store the transaction }
Inc(Result.SuccessCount);
except
on E: EParseError do
begin
Inc(Result.SkippedCount);
AddError(Result, E.Message);
end;
on E: EValidationError do
begin
Inc(Result.SkippedCount);
AddError(Result, Format('Line %d: %s', [Result.TotalLines, E.Message]));
end;
on E: EInOutError do
begin
AddError(Result, Format('Read error at line %d: %s',
[Result.TotalLines, E.Message]));
Break; { Stop reading — the file may be truncated }
end;
end;
end;
finally
CloseFile(F); { Always close the file }
end;
end;
end.
Demonstration Program
program BankImportDemo;
{$mode objfpc}{$H+}
uses
SysUtils, BankImport.Types, BankImport.Exceptions, BankImport.Engine;
procedure PrintImportResult(const AResult: TImportResult);
var
i: Integer;
begin
WriteLn;
WriteLn('=== Import Summary ===');
WriteLn(Format('Total lines: %d', [AResult.TotalLines]));
WriteLn(Format('Imported: %d', [AResult.SuccessCount]));
WriteLn(Format('Skipped: %d', [AResult.SkippedCount]));
if Length(AResult.ErrorMessages) > 0 then
begin
WriteLn;
WriteLn('Errors:');
for i := 0 to High(AResult.ErrorMessages) do
WriteLn(Format(' %d. %s', [i + 1, AResult.ErrorMessages[i]]));
end;
WriteLn('======================');
end;
var
ImportResult: TImportResult;
begin
WriteLn('PennyWise Bank Statement Import');
WriteLn;
try
ImportResult := ImportBankFile('bank_statement.csv');
PrintImportResult(ImportResult);
except
on E: EFileAccessError do
WriteLn('File error: ', E.Message);
on E: EImportError do
WriteLn('Import error: ', E.Message);
on E: Exception do
WriteLn('Unexpected error: [', E.ClassName, '] ', E.Message);
end;
end.
Design Analysis
Exception Handling at Three Levels
This pipeline handles errors at three distinct levels:
-
File level (
EFileAccessError): File not found or cannot be opened. These abort the entire import. There is no point processing lines if we cannot read the file. -
Line level (
EParseError): A malformed line. These skip the individual line but continue processing the rest. One bad row should not invalidate 999 good rows. -
Validation level (
EValidationError): A parseable but semantically invalid record. Same handling as parse errors — skip and continue.
This graduated approach — abort for catastrophic errors, skip-and-continue for recoverable ones — is the hallmark of robust data processing. It mirrors what real-world import tools do.
The Error Report
Instead of just printing errors to the console, the engine collects them in TImportResult.ErrorMessages. This allows the caller to decide how to present errors — in a console, in a GUI dialog, in a log file, or in an email notification. The engine produces data; the presentation layer presents it.
Key Takeaways
- Custom exceptions carry context.
EParseErrorincludes the line number and content.EValidationErrorincludes the field name. This makes error messages actionable. - try..finally guarantees file closure. No matter what happens during parsing, the file is closed.
- Graduated error handling. File-level errors are fatal; line-level errors are skipped. The pipeline adapts its response to the severity.
- Summary reporting. The caller gets structured results, not just printed messages. This supports testing, logging, and UI display.