353 lines
13 KiB
Python
353 lines
13 KiB
Python
|
|
"""
|
||
|
|
AMEX Statement Analyser — categorisation, insights, monthly/weekly breakdown.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import re
|
||
|
|
from collections import defaultdict
|
||
|
|
from datetime import datetime
|
||
|
|
from amex_parser import Transaction
|
||
|
|
|
||
|
|
# ── Payment processor prefixes to strip before categorising / displaying ─────
|
||
|
|
# WINDCAVE* is a common NZ payment gateway used by many merchants
|
||
|
|
_NOISE_PREFIX_RE = re.compile(
|
||
|
|
r'^(WINDCAVE\*|SQ \*|SP |PAYPAL \*|PAYPAL\*)',
|
||
|
|
re.IGNORECASE,
|
||
|
|
)
|
||
|
|
|
||
|
|
def normalise(description: str) -> str:
|
||
|
|
"""Strip payment processor prefixes for cleaner display and matching."""
|
||
|
|
return _NOISE_PREFIX_RE.sub('', description).strip()
|
||
|
|
|
||
|
|
|
||
|
|
# ── Category rules (checked in order — first match wins) ─────────────────────
|
||
|
|
# IMPORTANT: Fuel is before Public Transport so petrol stations don't fall into Public Transport.
|
||
|
|
CATEGORY_RULES: list[tuple[str, list[str]]] = [
|
||
|
|
('Payments', [
|
||
|
|
'PAYMENT - THANK YOU',
|
||
|
|
]),
|
||
|
|
('Groceries', [
|
||
|
|
'NEW WORLD', 'COUNTDOWN', 'WOOLWORTHS', 'PAK N SAVE', 'PAKNSAVE',
|
||
|
|
'FOUR SQUARE', 'FRESH CHOICE', 'HARRIS FARM', 'MOORE WILSON',
|
||
|
|
'FARRO', 'NOSH', 'HUCKLEBERRY', 'COMMONSENSE',
|
||
|
|
]),
|
||
|
|
('Dining & Takeaway', [
|
||
|
|
'MCDONALD', 'UBER EATS', 'ST PIERRE', 'KFC', 'UMU PIZZA',
|
||
|
|
'DOMINO', 'WILDFLOUR', 'SUBWAY', 'BURGER KING', 'PIZZA HUT',
|
||
|
|
'NOODLE', 'SUSHI', 'RESTAURANT', 'CAFE', 'BAKERY', 'COFFEE',
|
||
|
|
'STARBUCKS', 'MUFFIN BREAK', 'THAI', 'INDIAN', 'CHINESE',
|
||
|
|
'KEBAB', 'TAKEAWAY', 'PITA PIT', 'OPORTO', "WENDY'S", 'GRILL',
|
||
|
|
'BISTRO', 'BARBEQUE', 'BBQ', 'EGGS', 'PANCAKE',
|
||
|
|
]),
|
||
|
|
('Fuel', [
|
||
|
|
'MOBIL', 'BP ', 'Z ENERGY', 'CALTEX', 'GULL ', 'CHALLENGE ',
|
||
|
|
'NPD ', 'WAITOMO ', 'NIGHT N DAY',
|
||
|
|
]),
|
||
|
|
('Public Transport', [
|
||
|
|
'AT HOP', 'AT METRO', 'AUCKLAND TRANSPORT', 'AUCKLAND COUNCIL TR',
|
||
|
|
'PUBLIC TRANSPORT', 'INTERCITY', 'NAKED BUS', 'MANA COACH',
|
||
|
|
'TRANZ METRO', 'RITCHIES', 'GO BUS', 'NZ BUS',
|
||
|
|
'SNAPPER', 'METLINK', 'EBUS', 'ORBITER',
|
||
|
|
'UBER TRIP', 'UBER*TRIP',
|
||
|
|
]),
|
||
|
|
('Utilities', [
|
||
|
|
'SPARK', 'SLINGSHOT', 'POWERSHOP', 'CONTACT ENERGY', 'MERCURY ',
|
||
|
|
'GENESIS ', 'WATERCARE', 'CHORUS', 'VODAFONE', 'TWO DEGREES',
|
||
|
|
'2DEGREES', 'SKINNY', 'TRUSTPOWER', 'ORCON',
|
||
|
|
]),
|
||
|
|
('Subscriptions', [
|
||
|
|
'GOOGLE', 'APPLE.COM', 'EMBY', 'MYOB', 'NETFLIX', 'SPOTIFY',
|
||
|
|
'MICROSOFT', 'ADOBE', 'DROPBOX', 'AMAZON PRIME', 'DISNEY',
|
||
|
|
'SKYDRIVE', 'YOUTUBE', 'ICLOUD', 'GITHUB', 'CANVA', 'XERO',
|
||
|
|
'ZOOM', 'SLACK', 'ATLASSIAN', '1PASSWORD', 'LASTPASS', 'NORDVPN',
|
||
|
|
'PARAMOUNT', 'BINGE', 'NEON ', 'LIGHTROOM',
|
||
|
|
]),
|
||
|
|
('Health & Pharmacy', [
|
||
|
|
'CHEMIST WAREHOUSE', 'PHARMACY', 'UNICHEM', 'LIFE PHARMACY',
|
||
|
|
'GREEN CROSS', 'DOCTOR', 'MEDICAL', 'DENTAL', 'DENTIST',
|
||
|
|
'OPTOMETRIST', 'PHYSIO', 'HEALTHZONE', 'HEALTH FOOD',
|
||
|
|
'SPECSAVERS', 'VISION',
|
||
|
|
]),
|
||
|
|
('Home & Hardware', [
|
||
|
|
'BUNNINGS', 'MITRE 10', 'PLACEMAKERS', 'WISELIVING',
|
||
|
|
'IKEA', 'FREEDOM ', 'SPOTLIGHT', 'BED BATH', 'ADAIRS',
|
||
|
|
'BRISCOES', 'STEVENS ', 'LIVING AND GIVING', 'KMART HOMEWARE',
|
||
|
|
'BABY FACTORY', 'BABY CITY',
|
||
|
|
]),
|
||
|
|
('Electronics & Appliances', [
|
||
|
|
'HARVEY NORMAN', 'NOEL LEEMING', 'JB HI-FI', 'THE GOOD GUYS',
|
||
|
|
'PB TECH', 'COMPUTER LOUNGE', 'MIGHTY APE', 'PLAYTECH',
|
||
|
|
]),
|
||
|
|
('Shopping & Apparel', [
|
||
|
|
'KMART', 'THE WAREHOUSE', 'WAREHOUSE STATIONERY', 'FARMERS ',
|
||
|
|
'COTTON ON', 'GLASSONS', 'HALLENSTEINS', 'KATHMANDU', 'REBEL ',
|
||
|
|
'FOOTLOCKER', 'POSTIE', 'STIRLING SPORTS', 'LULULEMON', 'EZIBUY',
|
||
|
|
'WHITCOULLS', 'PAPER PLUS', 'SMITHS CITY', 'HANNAHS',
|
||
|
|
'NUMBER ONE SHOES', 'SHOE WAREHOUSE',
|
||
|
|
]),
|
||
|
|
('Entertainment', [
|
||
|
|
'HOYTS', 'READING CINEMA', 'EVENT CINEMA', 'TICKETEK',
|
||
|
|
'TICKETMASTER', 'EVENTFINDA', 'SKY TV', 'TIMEZONE',
|
||
|
|
'LASER STRIKE', 'ARCHIE BROTHERS', 'BOWLING', 'PAINTBALL',
|
||
|
|
]),
|
||
|
|
('Travel & Accommodation', [
|
||
|
|
'AIRBNB', 'BOOKING.COM', 'HOTEL', 'MOTEL', 'HOSTEL',
|
||
|
|
'AIR NEW ZEALAND', 'JETSTAR', 'QANTAS', 'VIRGIN AUSTRALIA',
|
||
|
|
'SCENIC HOTEL', 'HOLIDAY INN', 'NOVOTEL', 'IBIS ',
|
||
|
|
'TRIVAGO', 'EXPEDIA',
|
||
|
|
]),
|
||
|
|
('Personal Care', [
|
||
|
|
'HAIRCUT', 'BARBER', ' SALON', 'DAY SPA', 'BEAUTY', 'LASER ',
|
||
|
|
' NAIL ', 'WAXING', 'MASSAGE',
|
||
|
|
]),
|
||
|
|
('Food & Specialty', [
|
||
|
|
'SABATO', 'TANK ', 'ORIGIN COFFEE', 'ATOMIC COFFEE',
|
||
|
|
]),
|
||
|
|
('Pets', [
|
||
|
|
'VET ', 'VETCARE', 'PETBARN', 'PET STOCK', 'ANIMATES',
|
||
|
|
'HOLLYWOOD FISH', 'PETCO',
|
||
|
|
]),
|
||
|
|
]
|
||
|
|
|
||
|
|
CATEGORY_ICONS = {
|
||
|
|
'Groceries': '🛒',
|
||
|
|
'Dining & Takeaway': '🍔',
|
||
|
|
'Fuel': '⛽',
|
||
|
|
'Public Transport': '🚌',
|
||
|
|
'Utilities': '⚡',
|
||
|
|
'Subscriptions': '📱',
|
||
|
|
'Health & Pharmacy': '💊',
|
||
|
|
'Home & Hardware': '🏠',
|
||
|
|
'Electronics & Appliances':'💻',
|
||
|
|
'Shopping & Apparel': '🛍️',
|
||
|
|
'Entertainment': '🎬',
|
||
|
|
'Travel & Accommodation': '✈️',
|
||
|
|
'Personal Care': '💅',
|
||
|
|
'Food & Specialty': '🍷',
|
||
|
|
'Pets': '🐾',
|
||
|
|
'Other': '📦',
|
||
|
|
'Payments': '💳',
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def categorise(description: str) -> str:
|
||
|
|
# Normalise first so WINDCAVE*DOMINOS → DOMINOS → Dining & Takeaway
|
||
|
|
clean = normalise(description).upper()
|
||
|
|
for category, keywords in CATEGORY_RULES:
|
||
|
|
for kw in keywords:
|
||
|
|
if kw.upper() in clean:
|
||
|
|
return category
|
||
|
|
return 'Other'
|
||
|
|
|
||
|
|
|
||
|
|
def _parse_date(date_str: str) -> datetime | None:
|
||
|
|
try:
|
||
|
|
return datetime.strptime(date_str, '%d.%m.%y')
|
||
|
|
except ValueError:
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
def _clean_merchant(description: str) -> str:
|
||
|
|
"""Normalised merchant name for grouping (strip noise prefix + location suffix)."""
|
||
|
|
name = normalise(description)
|
||
|
|
# Strip trailing store-number IDs like "9518", "8212"
|
||
|
|
name = re.sub(r'\s+\d{3,6}$', '', name).strip()
|
||
|
|
# Strip trailing all-caps location tokens that aren't the brand name
|
||
|
|
parts = name.split()
|
||
|
|
while len(parts) > 1 and parts[-1].isupper() and len(parts[-1]) <= 15:
|
||
|
|
candidate = ' '.join(parts[:-1])
|
||
|
|
if any(kw.upper() in candidate.upper() for _, kws in CATEGORY_RULES for kw in kws):
|
||
|
|
parts = parts[:-1]
|
||
|
|
else:
|
||
|
|
break
|
||
|
|
return ' '.join(parts)
|
||
|
|
|
||
|
|
|
||
|
|
def _short_merchant(description: str) -> str:
|
||
|
|
"""Short display name for insights."""
|
||
|
|
name = normalise(description)
|
||
|
|
name = name.split('HTTPS://')[0].strip()
|
||
|
|
name = re.sub(r'\*\w+', '', name).strip()
|
||
|
|
return name.title()
|
||
|
|
|
||
|
|
|
||
|
|
def monthly_breakdown(enriched: list[dict]) -> dict:
|
||
|
|
month_data: dict[str, dict[str, float]] = defaultdict(lambda: defaultdict(float))
|
||
|
|
month_sort: dict[str, str] = {}
|
||
|
|
|
||
|
|
for tx in enriched:
|
||
|
|
if tx['is_credit']:
|
||
|
|
continue
|
||
|
|
dt = _parse_date(tx['date'])
|
||
|
|
if not dt:
|
||
|
|
continue
|
||
|
|
label = dt.strftime('%b %Y')
|
||
|
|
sort_key = dt.strftime('%Y-%m')
|
||
|
|
month_data[label][tx['category']] += tx['amount']
|
||
|
|
month_sort[label] = sort_key
|
||
|
|
|
||
|
|
months = sorted(month_data.keys(), key=lambda m: month_sort[m])
|
||
|
|
all_cats = sorted(
|
||
|
|
{c for m in months for c in month_data[m]},
|
||
|
|
key=lambda c: -sum(month_data[m].get(c, 0) for m in months),
|
||
|
|
)
|
||
|
|
|
||
|
|
return {
|
||
|
|
'months': months,
|
||
|
|
'categories': all_cats,
|
||
|
|
'by_month': {m: {c: round(month_data[m].get(c, 0), 2) for c in all_cats} for m in months},
|
||
|
|
'totals': {m: round(sum(month_data[m].values()), 2) for m in months},
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def weekly_breakdown(enriched: list[dict]) -> dict:
|
||
|
|
week_data: dict[str, float] = defaultdict(float)
|
||
|
|
for tx in enriched:
|
||
|
|
if tx['is_credit']:
|
||
|
|
continue
|
||
|
|
dt = _parse_date(tx['date'])
|
||
|
|
if not dt:
|
||
|
|
continue
|
||
|
|
week_num = (dt.day - 1) // 7 + 1
|
||
|
|
week_data[f'Week {week_num}'] += tx['amount']
|
||
|
|
weeks = sorted(week_data.keys())
|
||
|
|
return {'weeks': weeks, 'totals': {w: round(week_data[w], 2) for w in weeks}}
|
||
|
|
|
||
|
|
|
||
|
|
def generate_insights(enriched: list[dict], by_category: dict, total_spend: float) -> list[dict]:
|
||
|
|
spend_txns = [t for t in enriched if not t['is_credit']]
|
||
|
|
insights = []
|
||
|
|
|
||
|
|
# Grocery trips
|
||
|
|
grocery_txns = [t for t in spend_txns if t['category'] == 'Groceries']
|
||
|
|
if grocery_txns:
|
||
|
|
total = by_category.get('Groceries', 0)
|
||
|
|
count = len(grocery_txns)
|
||
|
|
avg = total / count if count else 0
|
||
|
|
pct = (total / total_spend * 100) if total_spend else 0
|
||
|
|
insights.append({
|
||
|
|
'icon': '🛒', 'title': 'Grocery Shopping',
|
||
|
|
'stat': f'${total:.2f}',
|
||
|
|
'detail': f'{count} trips · avg ${avg:.0f} each · {pct:.0f}% of spend',
|
||
|
|
'color': '#27ae60',
|
||
|
|
})
|
||
|
|
|
||
|
|
# Dining & takeaway
|
||
|
|
dining_txns = [t for t in spend_txns if t['category'] == 'Dining & Takeaway']
|
||
|
|
if dining_txns:
|
||
|
|
total = by_category.get('Dining & Takeaway', 0)
|
||
|
|
count = len(dining_txns)
|
||
|
|
insights.append({
|
||
|
|
'icon': '🍔', 'title': 'Dining & Takeaway',
|
||
|
|
'stat': f'${total:.2f}',
|
||
|
|
'detail': f'{count} orders this period',
|
||
|
|
'color': '#e67e22',
|
||
|
|
})
|
||
|
|
|
||
|
|
# Subscriptions
|
||
|
|
sub_txns = [t for t in spend_txns if t['category'] == 'Subscriptions']
|
||
|
|
if sub_txns:
|
||
|
|
total = by_category.get('Subscriptions', 0)
|
||
|
|
names = list(dict.fromkeys(_short_merchant(t['description']) for t in sub_txns))
|
||
|
|
insights.append({
|
||
|
|
'icon': '📱', 'title': 'Subscriptions',
|
||
|
|
'stat': f'${total:.2f}',
|
||
|
|
'detail': ', '.join(names[:5]),
|
||
|
|
'color': '#8e44ad',
|
||
|
|
})
|
||
|
|
|
||
|
|
# Utilities
|
||
|
|
util_txns = [t for t in spend_txns if t['category'] == 'Utilities']
|
||
|
|
if util_txns:
|
||
|
|
total = by_category.get('Utilities', 0)
|
||
|
|
names = list(dict.fromkeys(_short_merchant(t['description']) for t in util_txns))
|
||
|
|
insights.append({
|
||
|
|
'icon': '⚡', 'title': 'Utilities',
|
||
|
|
'stat': f'${total:.2f}',
|
||
|
|
'detail': ', '.join(names[:4]),
|
||
|
|
'color': '#f39c12',
|
||
|
|
})
|
||
|
|
|
||
|
|
# Biggest single purchase
|
||
|
|
if spend_txns:
|
||
|
|
biggest = max(spend_txns, key=lambda t: t['amount'])
|
||
|
|
insights.append({
|
||
|
|
'icon': '💳', 'title': 'Largest Purchase',
|
||
|
|
'stat': f'${biggest["amount"]:.2f}',
|
||
|
|
'detail': _short_merchant(biggest['description']),
|
||
|
|
'color': '#e74c3c',
|
||
|
|
})
|
||
|
|
|
||
|
|
# Daily average
|
||
|
|
active_days = len({t['date'] for t in spend_txns})
|
||
|
|
if active_days > 0 and total_spend > 0:
|
||
|
|
insights.append({
|
||
|
|
'icon': '📅', 'title': 'Daily Average',
|
||
|
|
'stat': f'${total_spend / active_days:.2f}',
|
||
|
|
'detail': f'Across {active_days} active spending days',
|
||
|
|
'color': '#006fcf',
|
||
|
|
})
|
||
|
|
|
||
|
|
return insights
|
||
|
|
|
||
|
|
|
||
|
|
def analyse(enriched_transactions: list[dict] | None = None,
|
||
|
|
transactions: list[Transaction] | None = None) -> dict:
|
||
|
|
"""Accept either pre-enriched dicts or raw Transaction objects."""
|
||
|
|
if enriched_transactions is None:
|
||
|
|
enriched_transactions = []
|
||
|
|
for tx in (transactions or []):
|
||
|
|
enriched_transactions.append({
|
||
|
|
'date': tx.date,
|
||
|
|
'description': normalise(tx.description),
|
||
|
|
'amount': tx.amount,
|
||
|
|
'is_credit': tx.is_credit,
|
||
|
|
'category': categorise(tx.description),
|
||
|
|
})
|
||
|
|
|
||
|
|
spend_txns = [t for t in enriched_transactions if not t['is_credit']]
|
||
|
|
credit_txns = [t for t in enriched_transactions if t['is_credit']]
|
||
|
|
payment_txns = [t for t in credit_txns if t['category'] == 'Payments']
|
||
|
|
|
||
|
|
total_spend = round(sum(t['amount'] for t in spend_txns), 2)
|
||
|
|
total_credits = round(sum(t['amount'] for t in credit_txns), 2)
|
||
|
|
total_payments = round(sum(t['amount'] for t in payment_txns), 2)
|
||
|
|
|
||
|
|
by_category: dict[str, float] = defaultdict(float)
|
||
|
|
for t in spend_txns:
|
||
|
|
by_category[t['category']] += t['amount']
|
||
|
|
by_category = {k: round(v, 2) for k, v in sorted(by_category.items(), key=lambda x: -x[1])}
|
||
|
|
|
||
|
|
merchant_map: dict[str, dict] = defaultdict(lambda: {'total': 0.0, 'count': 0})
|
||
|
|
for t in spend_txns:
|
||
|
|
name = _clean_merchant(t['description'])
|
||
|
|
merchant_map[name]['total'] += t['amount']
|
||
|
|
merchant_map[name]['count'] += 1
|
||
|
|
top_merchants = sorted(
|
||
|
|
[{'name': k, 'total': round(v['total'], 2), 'count': v['count']}
|
||
|
|
for k, v in merchant_map.items()],
|
||
|
|
key=lambda x: -x['total'],
|
||
|
|
)[:10]
|
||
|
|
|
||
|
|
monthly = monthly_breakdown(enriched_transactions)
|
||
|
|
weekly = weekly_breakdown(enriched_transactions)
|
||
|
|
insights = generate_insights(enriched_transactions, by_category, total_spend)
|
||
|
|
|
||
|
|
# Stable numeric IDs for frontend list keying
|
||
|
|
for i, tx in enumerate(enriched_transactions):
|
||
|
|
tx.setdefault('id', i)
|
||
|
|
|
||
|
|
return {
|
||
|
|
'total_spend': total_spend,
|
||
|
|
'total_credits': total_credits,
|
||
|
|
'total_payments': total_payments,
|
||
|
|
'transaction_count': len(enriched_transactions),
|
||
|
|
'spend_count': len(spend_txns),
|
||
|
|
'by_category': by_category,
|
||
|
|
'category_icons': CATEGORY_ICONS,
|
||
|
|
'top_merchants': top_merchants,
|
||
|
|
'insights': insights,
|
||
|
|
'monthly': monthly,
|
||
|
|
'weekly': weekly,
|
||
|
|
'transactions': enriched_transactions,
|
||
|
|
}
|