""" AMEX Statement Analyser — categorisation, insights, monthly/weekly breakdown. """ import re from collections import defaultdict from datetime import datetime from amex_parser import Transaction # ── Payment processor prefixes to strip before categorising / displaying ───── # WINDCAVE* is a common NZ payment gateway used by many merchants _NOISE_PREFIX_RE = re.compile( r'^(WINDCAVE\*|SQ \*|SP |PAYPAL \*|PAYPAL\*)', re.IGNORECASE, ) def normalise(description: str) -> str: """Strip payment processor prefixes for cleaner display and matching.""" return _NOISE_PREFIX_RE.sub('', description).strip() # ── Category rules (checked in order — first match wins) ───────────────────── # IMPORTANT: Fuel is before Public Transport so petrol stations don't fall into Public Transport. CATEGORY_RULES: list[tuple[str, list[str]]] = [ ('Payments', [ 'PAYMENT - THANK YOU', ]), ('Groceries', [ 'NEW WORLD', 'COUNTDOWN', 'WOOLWORTHS', 'PAK N SAVE', 'PAKNSAVE', 'FOUR SQUARE', 'FRESH CHOICE', 'HARRIS FARM', 'MOORE WILSON', 'FARRO', 'NOSH', 'HUCKLEBERRY', 'COMMONSENSE', ]), ('Dining & Takeaway', [ 'MCDONALD', 'UBER EATS', 'ST PIERRE', 'KFC', 'UMU PIZZA', 'DOMINO', 'WILDFLOUR', 'SUBWAY', 'BURGER KING', 'PIZZA HUT', 'NOODLE', 'SUSHI', 'RESTAURANT', 'CAFE', 'BAKERY', 'COFFEE', 'STARBUCKS', 'MUFFIN BREAK', 'THAI', 'INDIAN', 'CHINESE', 'KEBAB', 'TAKEAWAY', 'PITA PIT', 'OPORTO', "WENDY'S", 'GRILL', 'BISTRO', 'BARBEQUE', 'BBQ', 'EGGS', 'PANCAKE', ]), ('Fuel', [ 'MOBIL', 'BP ', 'Z ENERGY', 'CALTEX', 'GULL ', 'CHALLENGE ', 'NPD ', 'WAITOMO ', 'NIGHT N DAY', ]), ('Public Transport', [ 'AT HOP', 'AT METRO', 'AUCKLAND TRANSPORT', 'AUCKLAND COUNCIL TR', 'PUBLIC TRANSPORT', 'INTERCITY', 'NAKED BUS', 'MANA COACH', 'TRANZ METRO', 'RITCHIES', 'GO BUS', 'NZ BUS', 'SNAPPER', 'METLINK', 'EBUS', 'ORBITER', 'UBER TRIP', 'UBER*TRIP', ]), ('Utilities', [ 'SPARK', 'SLINGSHOT', 'POWERSHOP', 'CONTACT ENERGY', 'MERCURY ', 'GENESIS ', 'WATERCARE', 'CHORUS', 'VODAFONE', 'TWO DEGREES', '2DEGREES', 'SKINNY', 'TRUSTPOWER', 'ORCON', ]), ('Subscriptions', [ 'GOOGLE', 'APPLE.COM', 'EMBY', 'MYOB', 'NETFLIX', 'SPOTIFY', 'MICROSOFT', 'ADOBE', 'DROPBOX', 'AMAZON PRIME', 'DISNEY', 'SKYDRIVE', 'YOUTUBE', 'ICLOUD', 'GITHUB', 'CANVA', 'XERO', 'ZOOM', 'SLACK', 'ATLASSIAN', '1PASSWORD', 'LASTPASS', 'NORDVPN', 'PARAMOUNT', 'BINGE', 'NEON ', 'LIGHTROOM', ]), ('Health & Pharmacy', [ 'CHEMIST WAREHOUSE', 'PHARMACY', 'UNICHEM', 'LIFE PHARMACY', 'GREEN CROSS', 'DOCTOR', 'MEDICAL', 'DENTAL', 'DENTIST', 'OPTOMETRIST', 'PHYSIO', 'HEALTHZONE', 'HEALTH FOOD', 'SPECSAVERS', 'VISION', ]), ('Home & Hardware', [ 'BUNNINGS', 'MITRE 10', 'PLACEMAKERS', 'WISELIVING', 'IKEA', 'FREEDOM ', 'SPOTLIGHT', 'BED BATH', 'ADAIRS', 'BRISCOES', 'STEVENS ', 'LIVING AND GIVING', 'KMART HOMEWARE', 'BABY FACTORY', 'BABY CITY', ]), ('Electronics & Appliances', [ 'HARVEY NORMAN', 'NOEL LEEMING', 'JB HI-FI', 'THE GOOD GUYS', 'PB TECH', 'COMPUTER LOUNGE', 'MIGHTY APE', 'PLAYTECH', ]), ('Shopping & Apparel', [ 'KMART', 'THE WAREHOUSE', 'WAREHOUSE STATIONERY', 'FARMERS ', 'COTTON ON', 'GLASSONS', 'HALLENSTEINS', 'KATHMANDU', 'REBEL ', 'FOOTLOCKER', 'POSTIE', 'STIRLING SPORTS', 'LULULEMON', 'EZIBUY', 'WHITCOULLS', 'PAPER PLUS', 'SMITHS CITY', 'HANNAHS', 'NUMBER ONE SHOES', 'SHOE WAREHOUSE', ]), ('Entertainment', [ 'HOYTS', 'READING CINEMA', 'EVENT CINEMA', 'TICKETEK', 'TICKETMASTER', 'EVENTFINDA', 'SKY TV', 'TIMEZONE', 'LASER STRIKE', 'ARCHIE BROTHERS', 'BOWLING', 'PAINTBALL', ]), ('Travel & Accommodation', [ 'AIRBNB', 'BOOKING.COM', 'HOTEL', 'MOTEL', 'HOSTEL', 'AIR NEW ZEALAND', 'JETSTAR', 'QANTAS', 'VIRGIN AUSTRALIA', 'SCENIC HOTEL', 'HOLIDAY INN', 'NOVOTEL', 'IBIS ', 'TRIVAGO', 'EXPEDIA', ]), ('Personal Care', [ 'HAIRCUT', 'BARBER', ' SALON', 'DAY SPA', 'BEAUTY', 'LASER ', ' NAIL ', 'WAXING', 'MASSAGE', ]), ('Food & Specialty', [ 'SABATO', 'TANK ', 'ORIGIN COFFEE', 'ATOMIC COFFEE', ]), ('Pets', [ 'VET ', 'VETCARE', 'PETBARN', 'PET STOCK', 'ANIMATES', 'HOLLYWOOD FISH', 'PETCO', ]), ] CATEGORY_ICONS = { 'Groceries': '🛒', 'Dining & Takeaway': '🍔', 'Fuel': '⛽', 'Public Transport': '🚌', 'Utilities': '⚡', 'Subscriptions': '📱', 'Health & Pharmacy': '💊', 'Home & Hardware': '🏠', 'Electronics & Appliances':'💻', 'Shopping & Apparel': '🛍️', 'Entertainment': '🎬', 'Travel & Accommodation': '✈️', 'Personal Care': '💅', 'Food & Specialty': '🍷', 'Pets': '🐾', 'Other': '📦', 'Payments': '💳', } def categorise(description: str) -> str: # Normalise first so WINDCAVE*DOMINOS → DOMINOS → Dining & Takeaway clean = normalise(description).upper() for category, keywords in CATEGORY_RULES: for kw in keywords: if kw.upper() in clean: return category return 'Other' def _parse_date(date_str: str) -> datetime | None: try: return datetime.strptime(date_str, '%d.%m.%y') except ValueError: return None def _clean_merchant(description: str) -> str: """Normalised merchant name for grouping (strip noise prefix + location suffix).""" name = normalise(description) # Strip trailing store-number IDs like "9518", "8212" name = re.sub(r'\s+\d{3,6}$', '', name).strip() # Strip trailing all-caps location tokens that aren't the brand name parts = name.split() while len(parts) > 1 and parts[-1].isupper() and len(parts[-1]) <= 15: candidate = ' '.join(parts[:-1]) if any(kw.upper() in candidate.upper() for _, kws in CATEGORY_RULES for kw in kws): parts = parts[:-1] else: break return ' '.join(parts) def _short_merchant(description: str) -> str: """Short display name for insights.""" name = normalise(description) name = name.split('HTTPS://')[0].strip() name = re.sub(r'\*\w+', '', name).strip() return name.title() def monthly_breakdown(enriched: list[dict]) -> dict: month_data: dict[str, dict[str, float]] = defaultdict(lambda: defaultdict(float)) month_sort: dict[str, str] = {} for tx in enriched: if tx['is_credit']: continue dt = _parse_date(tx['date']) if not dt: continue label = dt.strftime('%b %Y') sort_key = dt.strftime('%Y-%m') month_data[label][tx['category']] += tx['amount'] month_sort[label] = sort_key months = sorted(month_data.keys(), key=lambda m: month_sort[m]) all_cats = sorted( {c for m in months for c in month_data[m]}, key=lambda c: -sum(month_data[m].get(c, 0) for m in months), ) return { 'months': months, 'categories': all_cats, 'by_month': {m: {c: round(month_data[m].get(c, 0), 2) for c in all_cats} for m in months}, 'totals': {m: round(sum(month_data[m].values()), 2) for m in months}, } def weekly_breakdown(enriched: list[dict]) -> dict: week_data: dict[str, float] = defaultdict(float) for tx in enriched: if tx['is_credit']: continue dt = _parse_date(tx['date']) if not dt: continue week_num = (dt.day - 1) // 7 + 1 week_data[f'Week {week_num}'] += tx['amount'] weeks = sorted(week_data.keys()) return {'weeks': weeks, 'totals': {w: round(week_data[w], 2) for w in weeks}} def generate_insights(enriched: list[dict], by_category: dict, total_spend: float) -> list[dict]: spend_txns = [t for t in enriched if not t['is_credit']] insights = [] # Grocery trips grocery_txns = [t for t in spend_txns if t['category'] == 'Groceries'] if grocery_txns: total = by_category.get('Groceries', 0) count = len(grocery_txns) avg = total / count if count else 0 pct = (total / total_spend * 100) if total_spend else 0 insights.append({ 'icon': '🛒', 'title': 'Grocery Shopping', 'stat': f'${total:.2f}', 'detail': f'{count} trips · avg ${avg:.0f} each · {pct:.0f}% of spend', 'color': '#27ae60', }) # Dining & takeaway dining_txns = [t for t in spend_txns if t['category'] == 'Dining & Takeaway'] if dining_txns: total = by_category.get('Dining & Takeaway', 0) count = len(dining_txns) insights.append({ 'icon': '🍔', 'title': 'Dining & Takeaway', 'stat': f'${total:.2f}', 'detail': f'{count} orders this period', 'color': '#e67e22', }) # Subscriptions sub_txns = [t for t in spend_txns if t['category'] == 'Subscriptions'] if sub_txns: total = by_category.get('Subscriptions', 0) names = list(dict.fromkeys(_short_merchant(t['description']) for t in sub_txns)) insights.append({ 'icon': '📱', 'title': 'Subscriptions', 'stat': f'${total:.2f}', 'detail': ', '.join(names[:5]), 'color': '#8e44ad', }) # Utilities util_txns = [t for t in spend_txns if t['category'] == 'Utilities'] if util_txns: total = by_category.get('Utilities', 0) names = list(dict.fromkeys(_short_merchant(t['description']) for t in util_txns)) insights.append({ 'icon': '⚡', 'title': 'Utilities', 'stat': f'${total:.2f}', 'detail': ', '.join(names[:4]), 'color': '#f39c12', }) # Biggest single purchase if spend_txns: biggest = max(spend_txns, key=lambda t: t['amount']) insights.append({ 'icon': '💳', 'title': 'Largest Purchase', 'stat': f'${biggest["amount"]:.2f}', 'detail': _short_merchant(biggest['description']), 'color': '#e74c3c', }) # Daily average active_days = len({t['date'] for t in spend_txns}) if active_days > 0 and total_spend > 0: insights.append({ 'icon': '📅', 'title': 'Daily Average', 'stat': f'${total_spend / active_days:.2f}', 'detail': f'Across {active_days} active spending days', 'color': '#006fcf', }) return insights def analyse(enriched_transactions: list[dict] | None = None, transactions: list[Transaction] | None = None) -> dict: """Accept either pre-enriched dicts or raw Transaction objects.""" if enriched_transactions is None: enriched_transactions = [] for tx in (transactions or []): enriched_transactions.append({ 'date': tx.date, 'description': normalise(tx.description), 'amount': tx.amount, 'is_credit': tx.is_credit, 'category': categorise(tx.description), }) spend_txns = [t for t in enriched_transactions if not t['is_credit']] credit_txns = [t for t in enriched_transactions if t['is_credit']] payment_txns = [t for t in credit_txns if t['category'] == 'Payments'] total_spend = round(sum(t['amount'] for t in spend_txns), 2) total_credits = round(sum(t['amount'] for t in credit_txns), 2) total_payments = round(sum(t['amount'] for t in payment_txns), 2) by_category: dict[str, float] = defaultdict(float) for t in spend_txns: by_category[t['category']] += t['amount'] by_category = {k: round(v, 2) for k, v in sorted(by_category.items(), key=lambda x: -x[1])} merchant_map: dict[str, dict] = defaultdict(lambda: {'total': 0.0, 'count': 0}) for t in spend_txns: name = _clean_merchant(t['description']) merchant_map[name]['total'] += t['amount'] merchant_map[name]['count'] += 1 top_merchants = sorted( [{'name': k, 'total': round(v['total'], 2), 'count': v['count']} for k, v in merchant_map.items()], key=lambda x: -x['total'], )[:10] monthly = monthly_breakdown(enriched_transactions) weekly = weekly_breakdown(enriched_transactions) insights = generate_insights(enriched_transactions, by_category, total_spend) # Stable numeric IDs for frontend list keying for i, tx in enumerate(enriched_transactions): tx.setdefault('id', i) return { 'total_spend': total_spend, 'total_credits': total_credits, 'total_payments': total_payments, 'transaction_count': len(enriched_transactions), 'spend_count': len(spend_txns), 'by_category': by_category, 'category_icons': CATEGORY_ICONS, 'top_merchants': top_merchants, 'insights': insights, 'monthly': monthly, 'weekly': weekly, 'transactions': enriched_transactions, }