Files
amexpal/amex_analyser.py
admin c1e22da9d6 Initial commit — AmexPal statement analyser
Python/Flask backend with pdfplumber parser, Svelte 4 frontend,
Docker multi-stage build. Includes category analysis, insights,
monthly/weekly charts, subscription audit, and annualised projections.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 08:37:01 +13:00

353 lines
13 KiB
Python

"""
AMEX Statement Analyser — categorisation, insights, monthly/weekly breakdown.
"""
import re
from collections import defaultdict
from datetime import datetime
from amex_parser import Transaction
# ── Payment processor prefixes to strip before categorising / displaying ─────
# WINDCAVE* is a common NZ payment gateway used by many merchants
_NOISE_PREFIX_RE = re.compile(
r'^(WINDCAVE\*|SQ \*|SP |PAYPAL \*|PAYPAL\*)',
re.IGNORECASE,
)
def normalise(description: str) -> str:
"""Strip payment processor prefixes for cleaner display and matching."""
return _NOISE_PREFIX_RE.sub('', description).strip()
# ── Category rules (checked in order — first match wins) ─────────────────────
# IMPORTANT: Fuel is before Public Transport so petrol stations don't fall into Public Transport.
CATEGORY_RULES: list[tuple[str, list[str]]] = [
('Payments', [
'PAYMENT - THANK YOU',
]),
('Groceries', [
'NEW WORLD', 'COUNTDOWN', 'WOOLWORTHS', 'PAK N SAVE', 'PAKNSAVE',
'FOUR SQUARE', 'FRESH CHOICE', 'HARRIS FARM', 'MOORE WILSON',
'FARRO', 'NOSH', 'HUCKLEBERRY', 'COMMONSENSE',
]),
('Dining & Takeaway', [
'MCDONALD', 'UBER EATS', 'ST PIERRE', 'KFC', 'UMU PIZZA',
'DOMINO', 'WILDFLOUR', 'SUBWAY', 'BURGER KING', 'PIZZA HUT',
'NOODLE', 'SUSHI', 'RESTAURANT', 'CAFE', 'BAKERY', 'COFFEE',
'STARBUCKS', 'MUFFIN BREAK', 'THAI', 'INDIAN', 'CHINESE',
'KEBAB', 'TAKEAWAY', 'PITA PIT', 'OPORTO', "WENDY'S", 'GRILL',
'BISTRO', 'BARBEQUE', 'BBQ', 'EGGS', 'PANCAKE',
]),
('Fuel', [
'MOBIL', 'BP ', 'Z ENERGY', 'CALTEX', 'GULL ', 'CHALLENGE ',
'NPD ', 'WAITOMO ', 'NIGHT N DAY',
]),
('Public Transport', [
'AT HOP', 'AT METRO', 'AUCKLAND TRANSPORT', 'AUCKLAND COUNCIL TR',
'PUBLIC TRANSPORT', 'INTERCITY', 'NAKED BUS', 'MANA COACH',
'TRANZ METRO', 'RITCHIES', 'GO BUS', 'NZ BUS',
'SNAPPER', 'METLINK', 'EBUS', 'ORBITER',
'UBER TRIP', 'UBER*TRIP',
]),
('Utilities', [
'SPARK', 'SLINGSHOT', 'POWERSHOP', 'CONTACT ENERGY', 'MERCURY ',
'GENESIS ', 'WATERCARE', 'CHORUS', 'VODAFONE', 'TWO DEGREES',
'2DEGREES', 'SKINNY', 'TRUSTPOWER', 'ORCON',
]),
('Subscriptions', [
'GOOGLE', 'APPLE.COM', 'EMBY', 'MYOB', 'NETFLIX', 'SPOTIFY',
'MICROSOFT', 'ADOBE', 'DROPBOX', 'AMAZON PRIME', 'DISNEY',
'SKYDRIVE', 'YOUTUBE', 'ICLOUD', 'GITHUB', 'CANVA', 'XERO',
'ZOOM', 'SLACK', 'ATLASSIAN', '1PASSWORD', 'LASTPASS', 'NORDVPN',
'PARAMOUNT', 'BINGE', 'NEON ', 'LIGHTROOM',
]),
('Health & Pharmacy', [
'CHEMIST WAREHOUSE', 'PHARMACY', 'UNICHEM', 'LIFE PHARMACY',
'GREEN CROSS', 'DOCTOR', 'MEDICAL', 'DENTAL', 'DENTIST',
'OPTOMETRIST', 'PHYSIO', 'HEALTHZONE', 'HEALTH FOOD',
'SPECSAVERS', 'VISION',
]),
('Home & Hardware', [
'BUNNINGS', 'MITRE 10', 'PLACEMAKERS', 'WISELIVING',
'IKEA', 'FREEDOM ', 'SPOTLIGHT', 'BED BATH', 'ADAIRS',
'BRISCOES', 'STEVENS ', 'LIVING AND GIVING', 'KMART HOMEWARE',
'BABY FACTORY', 'BABY CITY',
]),
('Electronics & Appliances', [
'HARVEY NORMAN', 'NOEL LEEMING', 'JB HI-FI', 'THE GOOD GUYS',
'PB TECH', 'COMPUTER LOUNGE', 'MIGHTY APE', 'PLAYTECH',
]),
('Shopping & Apparel', [
'KMART', 'THE WAREHOUSE', 'WAREHOUSE STATIONERY', 'FARMERS ',
'COTTON ON', 'GLASSONS', 'HALLENSTEINS', 'KATHMANDU', 'REBEL ',
'FOOTLOCKER', 'POSTIE', 'STIRLING SPORTS', 'LULULEMON', 'EZIBUY',
'WHITCOULLS', 'PAPER PLUS', 'SMITHS CITY', 'HANNAHS',
'NUMBER ONE SHOES', 'SHOE WAREHOUSE',
]),
('Entertainment', [
'HOYTS', 'READING CINEMA', 'EVENT CINEMA', 'TICKETEK',
'TICKETMASTER', 'EVENTFINDA', 'SKY TV', 'TIMEZONE',
'LASER STRIKE', 'ARCHIE BROTHERS', 'BOWLING', 'PAINTBALL',
]),
('Travel & Accommodation', [
'AIRBNB', 'BOOKING.COM', 'HOTEL', 'MOTEL', 'HOSTEL',
'AIR NEW ZEALAND', 'JETSTAR', 'QANTAS', 'VIRGIN AUSTRALIA',
'SCENIC HOTEL', 'HOLIDAY INN', 'NOVOTEL', 'IBIS ',
'TRIVAGO', 'EXPEDIA',
]),
('Personal Care', [
'HAIRCUT', 'BARBER', ' SALON', 'DAY SPA', 'BEAUTY', 'LASER ',
' NAIL ', 'WAXING', 'MASSAGE',
]),
('Food & Specialty', [
'SABATO', 'TANK ', 'ORIGIN COFFEE', 'ATOMIC COFFEE',
]),
('Pets', [
'VET ', 'VETCARE', 'PETBARN', 'PET STOCK', 'ANIMATES',
'HOLLYWOOD FISH', 'PETCO',
]),
]
CATEGORY_ICONS = {
'Groceries': '🛒',
'Dining & Takeaway': '🍔',
'Fuel': '',
'Public Transport': '🚌',
'Utilities': '',
'Subscriptions': '📱',
'Health & Pharmacy': '💊',
'Home & Hardware': '🏠',
'Electronics & Appliances':'💻',
'Shopping & Apparel': '🛍️',
'Entertainment': '🎬',
'Travel & Accommodation': '✈️',
'Personal Care': '💅',
'Food & Specialty': '🍷',
'Pets': '🐾',
'Other': '📦',
'Payments': '💳',
}
def categorise(description: str) -> str:
# Normalise first so WINDCAVE*DOMINOS → DOMINOS → Dining & Takeaway
clean = normalise(description).upper()
for category, keywords in CATEGORY_RULES:
for kw in keywords:
if kw.upper() in clean:
return category
return 'Other'
def _parse_date(date_str: str) -> datetime | None:
try:
return datetime.strptime(date_str, '%d.%m.%y')
except ValueError:
return None
def _clean_merchant(description: str) -> str:
"""Normalised merchant name for grouping (strip noise prefix + location suffix)."""
name = normalise(description)
# Strip trailing store-number IDs like "9518", "8212"
name = re.sub(r'\s+\d{3,6}$', '', name).strip()
# Strip trailing all-caps location tokens that aren't the brand name
parts = name.split()
while len(parts) > 1 and parts[-1].isupper() and len(parts[-1]) <= 15:
candidate = ' '.join(parts[:-1])
if any(kw.upper() in candidate.upper() for _, kws in CATEGORY_RULES for kw in kws):
parts = parts[:-1]
else:
break
return ' '.join(parts)
def _short_merchant(description: str) -> str:
"""Short display name for insights."""
name = normalise(description)
name = name.split('HTTPS://')[0].strip()
name = re.sub(r'\*\w+', '', name).strip()
return name.title()
def monthly_breakdown(enriched: list[dict]) -> dict:
month_data: dict[str, dict[str, float]] = defaultdict(lambda: defaultdict(float))
month_sort: dict[str, str] = {}
for tx in enriched:
if tx['is_credit']:
continue
dt = _parse_date(tx['date'])
if not dt:
continue
label = dt.strftime('%b %Y')
sort_key = dt.strftime('%Y-%m')
month_data[label][tx['category']] += tx['amount']
month_sort[label] = sort_key
months = sorted(month_data.keys(), key=lambda m: month_sort[m])
all_cats = sorted(
{c for m in months for c in month_data[m]},
key=lambda c: -sum(month_data[m].get(c, 0) for m in months),
)
return {
'months': months,
'categories': all_cats,
'by_month': {m: {c: round(month_data[m].get(c, 0), 2) for c in all_cats} for m in months},
'totals': {m: round(sum(month_data[m].values()), 2) for m in months},
}
def weekly_breakdown(enriched: list[dict]) -> dict:
week_data: dict[str, float] = defaultdict(float)
for tx in enriched:
if tx['is_credit']:
continue
dt = _parse_date(tx['date'])
if not dt:
continue
week_num = (dt.day - 1) // 7 + 1
week_data[f'Week {week_num}'] += tx['amount']
weeks = sorted(week_data.keys())
return {'weeks': weeks, 'totals': {w: round(week_data[w], 2) for w in weeks}}
def generate_insights(enriched: list[dict], by_category: dict, total_spend: float) -> list[dict]:
spend_txns = [t for t in enriched if not t['is_credit']]
insights = []
# Grocery trips
grocery_txns = [t for t in spend_txns if t['category'] == 'Groceries']
if grocery_txns:
total = by_category.get('Groceries', 0)
count = len(grocery_txns)
avg = total / count if count else 0
pct = (total / total_spend * 100) if total_spend else 0
insights.append({
'icon': '🛒', 'title': 'Grocery Shopping',
'stat': f'${total:.2f}',
'detail': f'{count} trips · avg ${avg:.0f} each · {pct:.0f}% of spend',
'color': '#27ae60',
})
# Dining & takeaway
dining_txns = [t for t in spend_txns if t['category'] == 'Dining & Takeaway']
if dining_txns:
total = by_category.get('Dining & Takeaway', 0)
count = len(dining_txns)
insights.append({
'icon': '🍔', 'title': 'Dining & Takeaway',
'stat': f'${total:.2f}',
'detail': f'{count} orders this period',
'color': '#e67e22',
})
# Subscriptions
sub_txns = [t for t in spend_txns if t['category'] == 'Subscriptions']
if sub_txns:
total = by_category.get('Subscriptions', 0)
names = list(dict.fromkeys(_short_merchant(t['description']) for t in sub_txns))
insights.append({
'icon': '📱', 'title': 'Subscriptions',
'stat': f'${total:.2f}',
'detail': ', '.join(names[:5]),
'color': '#8e44ad',
})
# Utilities
util_txns = [t for t in spend_txns if t['category'] == 'Utilities']
if util_txns:
total = by_category.get('Utilities', 0)
names = list(dict.fromkeys(_short_merchant(t['description']) for t in util_txns))
insights.append({
'icon': '', 'title': 'Utilities',
'stat': f'${total:.2f}',
'detail': ', '.join(names[:4]),
'color': '#f39c12',
})
# Biggest single purchase
if spend_txns:
biggest = max(spend_txns, key=lambda t: t['amount'])
insights.append({
'icon': '💳', 'title': 'Largest Purchase',
'stat': f'${biggest["amount"]:.2f}',
'detail': _short_merchant(biggest['description']),
'color': '#e74c3c',
})
# Daily average
active_days = len({t['date'] for t in spend_txns})
if active_days > 0 and total_spend > 0:
insights.append({
'icon': '📅', 'title': 'Daily Average',
'stat': f'${total_spend / active_days:.2f}',
'detail': f'Across {active_days} active spending days',
'color': '#006fcf',
})
return insights
def analyse(enriched_transactions: list[dict] | None = None,
transactions: list[Transaction] | None = None) -> dict:
"""Accept either pre-enriched dicts or raw Transaction objects."""
if enriched_transactions is None:
enriched_transactions = []
for tx in (transactions or []):
enriched_transactions.append({
'date': tx.date,
'description': normalise(tx.description),
'amount': tx.amount,
'is_credit': tx.is_credit,
'category': categorise(tx.description),
})
spend_txns = [t for t in enriched_transactions if not t['is_credit']]
credit_txns = [t for t in enriched_transactions if t['is_credit']]
payment_txns = [t for t in credit_txns if t['category'] == 'Payments']
total_spend = round(sum(t['amount'] for t in spend_txns), 2)
total_credits = round(sum(t['amount'] for t in credit_txns), 2)
total_payments = round(sum(t['amount'] for t in payment_txns), 2)
by_category: dict[str, float] = defaultdict(float)
for t in spend_txns:
by_category[t['category']] += t['amount']
by_category = {k: round(v, 2) for k, v in sorted(by_category.items(), key=lambda x: -x[1])}
merchant_map: dict[str, dict] = defaultdict(lambda: {'total': 0.0, 'count': 0})
for t in spend_txns:
name = _clean_merchant(t['description'])
merchant_map[name]['total'] += t['amount']
merchant_map[name]['count'] += 1
top_merchants = sorted(
[{'name': k, 'total': round(v['total'], 2), 'count': v['count']}
for k, v in merchant_map.items()],
key=lambda x: -x['total'],
)[:10]
monthly = monthly_breakdown(enriched_transactions)
weekly = weekly_breakdown(enriched_transactions)
insights = generate_insights(enriched_transactions, by_category, total_spend)
# Stable numeric IDs for frontend list keying
for i, tx in enumerate(enriched_transactions):
tx.setdefault('id', i)
return {
'total_spend': total_spend,
'total_credits': total_credits,
'total_payments': total_payments,
'transaction_count': len(enriched_transactions),
'spend_count': len(spend_txns),
'by_category': by_category,
'category_icons': CATEGORY_ICONS,
'top_merchants': top_merchants,
'insights': insights,
'monthly': monthly,
'weekly': weekly,
'transactions': enriched_transactions,
}