Appendix I — SQL Cookbook

Ready-to-adapt recipes for common, recurring problems. All in PostgreSQL, against Mercado. Copy, adapt, understand.

-- customers who never ordered (NULL-safe; preferred)
SELECT c.* FROM customers c
WHERE NOT EXISTS (SELECT 1 FROM orders o WHERE o.customer_id = c.customer_id);
-- equivalents: LEFT JOIN ... WHERE o.order_id IS NULL ; or  EXCEPT

Top-N per group

-- 2 most expensive products per category
SELECT * FROM (
  SELECT name, category_id, price,
         ROW_NUMBER() OVER (PARTITION BY category_id ORDER BY price DESC) AS rn
  FROM products
) t WHERE rn <= 2;

Running total

SELECT order_id, order_date,
       SUM(amount) OVER (ORDER BY order_date) AS running_revenue
FROM payments ORDER BY order_date;

Percent of total

SELECT name, price,
       ROUND(100.0 * price / SUM(price) OVER (), 2) AS pct_of_catalog
FROM products;

Period-over-period (LAG)

WITH m AS (SELECT date_trunc('month', order_date) mo, SUM(amount) rev
           FROM payments GROUP BY 1)
SELECT mo, rev, LAG(rev) OVER (ORDER BY mo) AS prev,
       ROUND(100.0*(rev - LAG(rev) OVER (ORDER BY mo))/NULLIF(LAG(rev) OVER (ORDER BY mo),0),1) AS pct_change
FROM m ORDER BY mo;

Moving average

WITH d AS (SELECT order_date::date day, SUM(amount) rev FROM payments GROUP BY 1)
SELECT day, rev, ROUND(AVG(rev) OVER (ORDER BY day ROWS BETWEEN 2 PRECEDING AND CURRENT ROW),2) AS ma3
FROM d ORDER BY day;

Aggregate including the zeros (LEFT JOIN + COALESCE)

SELECT c.customer_id, c.last_name,
       COUNT(DISTINCT o.order_id) AS orders,
       COALESCE(SUM(oi.quantity*oi.unit_price),0) AS revenue
FROM customers c
LEFT JOIN orders o ON o.customer_id=c.customer_id
LEFT JOIN order_items oi ON oi.order_id=o.order_id
GROUP BY c.customer_id, c.last_name;

Date spine (every period, even empty ones)

SELECT g::date AS day, COALESCE(SUM(p.amount),0) AS rev
FROM generate_series('2024-01-01','2024-01-31','1 day') g
LEFT JOIN payments p ON p.paid_at::date = g::date
GROUP BY g ORDER BY g;

Conditional aggregation (FILTER)

SELECT
  COUNT(*) FILTER (WHERE status='delivered') AS delivered,
  COUNT(*) FILTER (WHERE status='cancelled') AS cancelled,
  COUNT(*) AS total
FROM orders;

Upsert (insert or update)

INSERT INTO inventory (product_id, warehouse_id, quantity)
VALUES (1, 1, 50)
ON CONFLICT (product_id, warehouse_id) DO UPDATE SET quantity = EXCLUDED.quantity;

Safe division

SELECT delivered::numeric / NULLIF(total,0) AS rate FROM order_stats;   -- NULL, not error, when total=0

Recursive: tree descendants

WITH RECURSIVE sub AS (
  SELECT category_id, name FROM categories WHERE name='Electronics'
  UNION ALL
  SELECT c.category_id, c.name FROM categories c JOIN sub ON c.parent_category_id = sub.category_id
) SELECT * FROM sub;

Recursive: ancestor path

WITH RECURSIVE up AS (
  SELECT category_id, name, parent_category_id FROM categories WHERE name='Laptops'
  UNION ALL
  SELECT c.category_id, c.name, c.parent_category_id FROM categories c JOIN up ON c.category_id = up.parent_category_id
) SELECT string_agg(name,' < ' ORDER BY category_id DESC) FROM up;

Deduplicate (keep one per group)

DELETE FROM t a USING t b
WHERE a.ctid < b.ctid AND a.dedup_key = b.dedup_key;   -- keeps one row per dedup_key

Gaps and islands (consecutive runs) — sketch

-- number rows, subtract row_number from a sequence value; equal differences = same "island"
SELECT grp, MIN(d) AS start, MAX(d) AS end FROM (
  SELECT d, d - (ROW_NUMBER() OVER (ORDER BY d))::int AS grp FROM daily_active
) s GROUP BY grp ORDER BY start;

Pivot (rows → columns) with FILTER

SELECT category_id,
  SUM(price) FILTER (WHERE is_active) AS active_value,
  SUM(price) FILTER (WHERE NOT is_active) AS inactive_value
FROM products GROUP BY category_id;

Keyset pagination (fast deep paging)

-- next page after (last_price, last_id) under ORDER BY price, product_id
SELECT * FROM products
WHERE (price, product_id) > (:last_price, :last_id)
ORDER BY price, product_id LIMIT 20;

JSONB query + index

CREATE INDEX ON products USING GIN (attributes);
SELECT name FROM products WHERE attributes @> '{"ram_gb": 16}';
SELECT name FROM products
WHERE to_tsvector('english', name || ' ' || coalesce(description,'')) @@ to_tsquery('english','wireless & headphone');

Average order value (avoid average-of-averages)

-- correct: every order weighted equally
WITH per_order AS (SELECT order_id, SUM(quantity*unit_price) tot FROM order_items GROUP BY order_id)
SELECT AVG(tot) AS aov FROM per_order;

See also: Appendix C (quick reference), and the chapters each recipe draws on (6, 7, 9, 11, 12, 13, 16).