Appendix I — SQL Cookbook
Ready-to-adapt recipes for common, recurring problems. All in PostgreSQL, against Mercado. Copy, adapt, understand.
Anti-join: "X with no related Y"
-- customers who never ordered (NULL-safe; preferred)
SELECT c.* FROM customers c
WHERE NOT EXISTS (SELECT 1 FROM orders o WHERE o.customer_id = c.customer_id);
-- equivalents: LEFT JOIN ... WHERE o.order_id IS NULL ; or EXCEPT
Top-N per group
-- 2 most expensive products per category
SELECT * FROM (
SELECT name, category_id, price,
ROW_NUMBER() OVER (PARTITION BY category_id ORDER BY price DESC) AS rn
FROM products
) t WHERE rn <= 2;
Running total
SELECT order_id, order_date,
SUM(amount) OVER (ORDER BY order_date) AS running_revenue
FROM payments ORDER BY order_date;
Percent of total
SELECT name, price,
ROUND(100.0 * price / SUM(price) OVER (), 2) AS pct_of_catalog
FROM products;
Period-over-period (LAG)
WITH m AS (SELECT date_trunc('month', order_date) mo, SUM(amount) rev
FROM payments GROUP BY 1)
SELECT mo, rev, LAG(rev) OVER (ORDER BY mo) AS prev,
ROUND(100.0*(rev - LAG(rev) OVER (ORDER BY mo))/NULLIF(LAG(rev) OVER (ORDER BY mo),0),1) AS pct_change
FROM m ORDER BY mo;
Moving average
WITH d AS (SELECT order_date::date day, SUM(amount) rev FROM payments GROUP BY 1)
SELECT day, rev, ROUND(AVG(rev) OVER (ORDER BY day ROWS BETWEEN 2 PRECEDING AND CURRENT ROW),2) AS ma3
FROM d ORDER BY day;
Aggregate including the zeros (LEFT JOIN + COALESCE)
SELECT c.customer_id, c.last_name,
COUNT(DISTINCT o.order_id) AS orders,
COALESCE(SUM(oi.quantity*oi.unit_price),0) AS revenue
FROM customers c
LEFT JOIN orders o ON o.customer_id=c.customer_id
LEFT JOIN order_items oi ON oi.order_id=o.order_id
GROUP BY c.customer_id, c.last_name;
Date spine (every period, even empty ones)
SELECT g::date AS day, COALESCE(SUM(p.amount),0) AS rev
FROM generate_series('2024-01-01','2024-01-31','1 day') g
LEFT JOIN payments p ON p.paid_at::date = g::date
GROUP BY g ORDER BY g;
Conditional aggregation (FILTER)
SELECT
COUNT(*) FILTER (WHERE status='delivered') AS delivered,
COUNT(*) FILTER (WHERE status='cancelled') AS cancelled,
COUNT(*) AS total
FROM orders;
Upsert (insert or update)
INSERT INTO inventory (product_id, warehouse_id, quantity)
VALUES (1, 1, 50)
ON CONFLICT (product_id, warehouse_id) DO UPDATE SET quantity = EXCLUDED.quantity;
Safe division
SELECT delivered::numeric / NULLIF(total,0) AS rate FROM order_stats; -- NULL, not error, when total=0
Recursive: tree descendants
WITH RECURSIVE sub AS (
SELECT category_id, name FROM categories WHERE name='Electronics'
UNION ALL
SELECT c.category_id, c.name FROM categories c JOIN sub ON c.parent_category_id = sub.category_id
) SELECT * FROM sub;
Recursive: ancestor path
WITH RECURSIVE up AS (
SELECT category_id, name, parent_category_id FROM categories WHERE name='Laptops'
UNION ALL
SELECT c.category_id, c.name, c.parent_category_id FROM categories c JOIN up ON c.category_id = up.parent_category_id
) SELECT string_agg(name,' < ' ORDER BY category_id DESC) FROM up;
Deduplicate (keep one per group)
DELETE FROM t a USING t b
WHERE a.ctid < b.ctid AND a.dedup_key = b.dedup_key; -- keeps one row per dedup_key
Gaps and islands (consecutive runs) — sketch
-- number rows, subtract row_number from a sequence value; equal differences = same "island"
SELECT grp, MIN(d) AS start, MAX(d) AS end FROM (
SELECT d, d - (ROW_NUMBER() OVER (ORDER BY d))::int AS grp FROM daily_active
) s GROUP BY grp ORDER BY start;
Pivot (rows → columns) with FILTER
SELECT category_id,
SUM(price) FILTER (WHERE is_active) AS active_value,
SUM(price) FILTER (WHERE NOT is_active) AS inactive_value
FROM products GROUP BY category_id;
Keyset pagination (fast deep paging)
-- next page after (last_price, last_id) under ORDER BY price, product_id
SELECT * FROM products
WHERE (price, product_id) > (:last_price, :last_id)
ORDER BY price, product_id LIMIT 20;
JSONB query + index
CREATE INDEX ON products USING GIN (attributes);
SELECT name FROM products WHERE attributes @> '{"ram_gb": 16}';
Full-text search
SELECT name FROM products
WHERE to_tsvector('english', name || ' ' || coalesce(description,'')) @@ to_tsquery('english','wireless & headphone');
Average order value (avoid average-of-averages)
-- correct: every order weighted equally
WITH per_order AS (SELECT order_id, SUM(quantity*unit_price) tot FROM order_items GROUP BY order_id)
SELECT AVG(tot) AS aov FROM per_order;
See also: Appendix C (quick reference), and the chapters each recipe draws on (6, 7, 9, 11, 12, 13, 16).