SQL
SQL
Resources
things to study
COUNT(ID) vs COUNT(*), when NULL exists. link
Window functions. LEAD() link
window function ROWS and frame_clause
rows between 5 following and 5 preceding
rows between current row and 2 preceding
rows between 3 following and current row
{ RANGE | ROWS } BETWEEN frame_start AND frame_end
compare time: https://www.mysqltutorial.org/mysql-date-functions/
advanced subquery
pivot/unpivot
WITH RECURSIVE cte_name (column 1, column 2, ...)
as (
base_query
UNION ALL
cte_query
)
SELECT * FROM cte_name;
partition
full outer join
advanced join (equijion, cross join, outer join)
group by multiple columns
Count multiple columns: https://dba.stackexchange.com/questions/127564/how-to-use-count-with-multiple-columns
analyze a time series in sql. link
Notes
5 questions to think before writing a query:
- What tables do we need to use?
- How do we combine different tables?
- What fields do we need to return?
- What conditions do we have?
- Do we have to return a specific number of rows or in any kind of order?
Queries for financial analysis
- group data by year and quarter or any other levels (Months, weeks, days)
SELECT EXTRACT(YEAR FROM date) AS year,
EXTRACT(QUARTER FROM date) AS quarter,
COUNT(amount) AS number_of_transactions
FROM card_transaction
GROUP BY EXTRACT(YEAR FROM date), EXTRACT(QUARTER FROM date)
ORDER BY EXTRACT(YEAR FROM date) ASC, EXTRACT(QUARTER FROM date);
- Calculate Running Totals
SELECT DISTINCT (ct.date),
cty.card_type_name,
SUM (ct.amount) OVER (PARTITION BY cty.card_type_name ORDER BY ct.date ASC) AS transaction_running_total
FROM card_transaction ct
JOIN card_number cn ON ct.card_number_id = cn.id
JOIN card_type cty ON cn.card_type_id = cty.id
WHERE date > '2020-11-30' AND date <= '2020-12-31'
ORDER BY cty.card_type_name ASC;
- Calculate Running Averages
SELECT ct.date,
cty.card_type_name,
SUM(ct.amount) AS daily_sum,
AVG(SUM(ct.amount)) OVER (ORDER BY ct.date ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS transaction_running_average
FROM card_transaction ct
JOIN card_number cn ON ct.card_number_id = cn.id
JOIN card_type cty ON cn.card_type_id = cty.id
WHERE ct.date > '2020-11-30' AND date <= '2020-12-31'
AND cty.card_type_name = 'visa-electron'
GROUP BY ct.date, cty.card_type_name
ORDER BY cty.card_type_name;
- Time Series Analysis
SELECT ct.date,
SUM(ct.amount) AS daily_sum,
(SUM(ct.amount)-LAG(SUM(ct.amount)) OVER (ORDER BY ct.date ASC))
AS daily_difference,
co.country_name
FROM card_transaction ct
JOIN card_number cn ON ct.card_number_id = cn.id
JOIN customer cu ON cn.customer_id = cu.id
JOIN country co ON cu.country_id = co.id
WHERE ct.date > '2020-11-30' AND date <= '2020-12-31'
AND co.country_name = 'China'
GROUP BY ct.date, co.country_name;
- Add Multiple Grouping Levels
SELECT EXTRACT(YEAR FROM ct.date) AS year,
EXTRACT(QUARTER FROM ct.date) AS quarter,
COUNT(ct.amount) AS number_of_transactions,
cty.card_type_name
FROM card_transaction ct
JOIN card_number cn ON ct.card_number_id = cn.id
JOIN card_type cty ON cn.card_type_id = cty.id
GROUP BY ROLLUP(EXTRACT(YEAR FROM ct.date), EXTRACT(QUARTER FROM ct.date), cty.card_type_name);
- Create a Revenue Report on a Yearly Level
SELECT cu.NIN,
cu.first_name,
cu.last_name,
SUM(ct.amount) AS total_revenue_per_customer,
CASE
WHEN SUM(ct.amount) >= 1000000 THEN 'Platinum'
WHEN SUM(ct.amount) < 1000000 THEN 'Gold'
END AS customer_category,
SUM(CASE WHEN ct.date >= '2019-01-01' AND ct.date < '2020-01-01' THEN ct.amount ELSE 0 END) AS revenue_2019,
SUM(CASE WHEN ct.date >= '2020-01-01' AND ct.date < '2021-01-01' THEN ct.amount ELSE 0 END) AS revenue_2020
FROM card_transaction ct
JOIN card_number cn ON ct.card_number_id = cn.id
JOIN customer cu ON cn.customer_id = cu.id
GROUP BY cu.NIN, cu.first_name, cu.last_name
ORDER BY total_revenue_per_customer DESC;