Case Study 1: Building a Dictionary with BST — A Spell Checker Using Binary Search Trees

The Problem

We want to build a spell checker that loads a dictionary of English words and checks whether each word in a text document is correctly spelled. The core operations are:

  1. Insert a word into the dictionary (loading phase).
  2. Search for a word (checking phase).
  3. List all words alphabetically (display phase).

A BST is ideal for this: insert is O(log n) for balanced trees, search is O(log n), and in-order traversal produces alphabetical output.

Implementation

program SpellChecker;

uses
  SysUtils;

type
  PWordNode = ^TWordNode;
  TWordNode = record
    Word: string[40];
    Left, Right: PWordNode;
  end;

var
  DictRoot: PWordNode;
  WordCount: Integer;

{ Insert a word into the BST }
procedure InsertWord(var Node: PWordNode; const W: string);
begin
  if Node = nil then begin
    New(Node);
    Node^.Word := LowerCase(W);
    Node^.Left := nil;
    Node^.Right := nil;
    Inc(WordCount);
  end
  else if LowerCase(W) < Node^.Word then
    InsertWord(Node^.Left, LowerCase(W))
  else if LowerCase(W) > Node^.Word then
    InsertWord(Node^.Right, LowerCase(W));
  { Equal: word already in dictionary, skip }
end;

{ Search for a word }
function FindWord(Node: PWordNode; const W: string): Boolean;
var
  LW: string;
begin
  LW := LowerCase(W);
  if Node = nil then
    FindWord := False
  else if LW = Node^.Word then
    FindWord := True
  else if LW < Node^.Word then
    FindWord := FindWord(Node^.Left, LW)
  else
    FindWord := FindWord(Node^.Right, LW);
end;

{ In-order traversal: print all words alphabetically }
procedure PrintDictionary(Node: PWordNode);
begin
  if Node <> nil then begin
    PrintDictionary(Node^.Left);
    WriteLn('  ', Node^.Word);
    PrintDictionary(Node^.Right);
  end;
end;

{ Count tree height }
function TreeHeight(Node: PWordNode): Integer;
var
  LH, RH: Integer;
begin
  if Node = nil then
    TreeHeight := 0
  else begin
    LH := TreeHeight(Node^.Left);
    RH := TreeHeight(Node^.Right);
    if LH > RH then TreeHeight := 1 + LH
    else TreeHeight := 1 + RH;
  end;
end;

{ Free tree }
procedure FreeTree(var Node: PWordNode);
begin
  if Node <> nil then begin
    FreeTree(Node^.Left);
    FreeTree(Node^.Right);
    Dispose(Node);
    Node := nil;
  end;
end;

{ Build a sample dictionary }
procedure BuildDictionary;
const
  Words: array[1..20] of string = (
    'the', 'quick', 'brown', 'fox', 'jumps',
    'over', 'lazy', 'dog', 'and', 'cat',
    'hello', 'world', 'pascal', 'program',
    'algorithm', 'tree', 'graph', 'sort',
    'search', 'recursion'
  );
var
  I: Integer;
begin
  for I := 1 to 20 do
    InsertWord(DictRoot, Words[I]);
end;

{ Check spelling of a text }
procedure CheckSpelling(const Text: string);
var
  Words: array[1..100] of string;
  WCount, I, J: Integer;
  Current: string;
begin
  WCount := 0;
  Current := '';

  { Simple word tokenizer }
  for I := 1 to Length(Text) do begin
    if Text[I] in ['a'..'z', 'A'..'Z', ''''] then
      Current := Current + Text[I]
    else if Current <> '' then begin
      Inc(WCount);
      Words[WCount] := Current;
      Current := '';
    end;
  end;
  if Current <> '' then begin
    Inc(WCount);
    Words[WCount] := Current;
  end;

  { Check each word }
  WriteLn('Checking ', WCount, ' words...');
  for I := 1 to WCount do begin
    if not FindWord(DictRoot, Words[I]) then
      WriteLn('  MISSPELLED: "', Words[I], '"')
    else
      WriteLn('  OK: "', Words[I], '"');
  end;
end;

begin
  WriteLn('=== Spell Checker with BST Dictionary ===');
  WriteLn;

  DictRoot := nil;
  WordCount := 0;

  BuildDictionary;
  WriteLn('Dictionary loaded: ', WordCount, ' words');
  WriteLn('Tree height: ', TreeHeight(DictRoot));
  WriteLn;

  WriteLn('--- Dictionary (alphabetical) ---');
  PrintDictionary(DictRoot);
  WriteLn;

  WriteLn('--- Spell Check ---');
  CheckSpelling('The quick brown fox jumps over the lazy xyzzy');
  WriteLn;

  WriteLn('--- Another Check ---');
  CheckSpelling('Pascal program and algorithm are cool recursion works');
  WriteLn;

  FreeTree(DictRoot);
  WriteLn('Dictionary freed.');
end.

Performance Analysis

With 20 words, the BST height is approximately 5 (log₂(20) ≈ 4.3). Each word lookup takes at most 5 comparisons. If we scaled to a full English dictionary (170,000 words), a balanced BST would have height approximately 18 — meaning any word can be checked in 18 or fewer string comparisons.

A hash table could achieve O(1) average lookup, but the BST offers a significant advantage: in-order traversal produces sorted output for free. This is valuable for features like "did you mean?" suggestions (nearby words in alphabetical order).

Discussion Questions

  1. What would happen to lookup performance if the dictionary words were loaded in alphabetical order?
  2. How would you implement "did you mean?" suggestions for misspelled words?
  3. Compare this BST dictionary to a sorted array with binary search. What are the tradeoffs?
  4. A real spell checker needs to handle 170,000+ words. Would this BST implementation be fast enough?