Files
gw/backend/app/routers/analytics.py
T
ponzischeme89 6d44e05de4 v1
2026-04-18 07:23:55 +12:00

203 lines
6.5 KiB
Python

import hashlib
import secrets
import httpx
import user_agents
from fastapi import APIRouter, Depends, Request, Response
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.auth.deps import get_current_user
from app.middleware.rate_limit import limiter
from app.schemas.analytics import AnalyticsSummary, BookingOperationsSummary, EventCreate
from app.services.analytics import get_booking_operations_summary, get_summary, record_event
router = APIRouter(tags=["Analytics"])
ANON_COOKIE_NAME = "__gw_anon"
ANON_COOKIE_MAX_AGE = 60 * 60 * 24 * 365
CLIENT_METADATA_KEYS = {
"area",
"channel",
"destination",
"menu",
"plan",
"popular",
"price",
"unit",
"variant",
}
_PRIVATE_PREFIXES = ("127.", "10.", "172.16.", "172.17.", "172.18.", "172.19.",
"172.20.", "172.21.", "172.22.", "172.23.", "172.24.", "172.25.",
"172.26.", "172.27.", "172.28.", "172.29.", "172.30.", "172.31.",
"192.168.", "::1", "localhost")
def _mask_ip(ip: str) -> str:
"""Return a privacy-safe partial IP: last octet replaced with 'x'."""
if ":" in ip: # IPv6 — keep first 4 groups
parts = ip.split(":")
return ":".join(parts[:4]) + ":x"
parts = ip.split(".")
if len(parts) == 4:
return f"{parts[0]}.{parts[1]}.{parts[2]}.x"
return ip
def _get_client_ip(request: Request) -> str | None:
"""Resolve the best-effort client IP, preferring forwarded headers."""
forwarded = request.headers.get("x-forwarded-for")
if forwarded:
first = forwarded.split(",")[0].strip()
if first:
return first
real_ip = request.headers.get("x-real-ip")
if real_ip:
return real_ip.strip()
return request.client.host if request.client else None
def _should_secure_cookie(request: Request) -> bool:
"""Use Secure cookies in HTTPS contexts, but allow localhost HTTP development."""
return request.url.scheme == "https"
def _sanitize_client_metadata(metadata: dict | None) -> dict | None:
"""Keep only flat, non-identifying telemetry labels from the browser."""
if not metadata:
return None
clean: dict[str, str | int | float | bool] = {}
for key, value in metadata.items():
if not isinstance(key, str) or key not in CLIENT_METADATA_KEYS:
continue
if isinstance(value, str):
clean[key] = value[:120]
continue
if isinstance(value, bool):
clean[key] = value
continue
if isinstance(value, (int, float)):
clean[key] = value
return clean or None
def _get_or_create_session_id(request: Request, response: Response, payload_session_id: str | None) -> str:
"""Use a server-owned anonymous session id, falling back to legacy payload support."""
cookie_session_id = request.cookies.get(ANON_COOKIE_NAME)
session_id = cookie_session_id or payload_session_id or secrets.token_urlsafe(24)
if cookie_session_id != session_id:
response.set_cookie(
key=ANON_COOKIE_NAME,
value=session_id,
max_age=ANON_COOKIE_MAX_AGE,
httponly=True,
samesite="lax",
secure=_should_secure_cookie(request),
path="/",
)
return session_id
def _parse_ua(ua_string: str) -> tuple[str | None, str | None]:
"""Parse a User-Agent string into (browser, os_name)."""
if not ua_string:
return None, None
ua = user_agents.parse(ua_string)
browser = ua.browser.family
if browser and browser != "Other" and ua.browser.version_string:
major = ua.browser.version_string.split(".")[0]
browser = f"{browser} {major}"
os_name = ua.os.family
if os_name and os_name != "Other" and ua.os.version_string:
os_name = f"{os_name} {ua.os.version_string}"
return (
None if not browser or browser == "Other" else browser[:100],
None if not os_name or os_name == "Other" else os_name[:100],
)
async def _geo_lookup(ip: str) -> tuple[str | None, str | None]:
"""Resolve IP to (country, city) via ip-api.com. Returns (None, None) on failure."""
if not ip or any(ip.startswith(p) for p in _PRIVATE_PREFIXES):
return None, None
try:
async with httpx.AsyncClient(timeout=2.0) as client:
r = await client.get(
f"http://ip-api.com/json/{ip}",
params={"fields": "status,country,city"},
)
if r.status_code == 200:
d = r.json()
if d.get("status") == "success":
return d.get("country"), d.get("city")
except Exception:
pass
return None, None
@router.post("/api/web/event", status_code=201)
@router.post("/api/analytics/event", status_code=201)
@limiter.limit("60/minute")
async def ingest_event(
request: Request,
response: Response,
data: EventCreate,
db: AsyncSession = Depends(get_db),
):
"""Record a telemetry event. Public — no auth required."""
raw_ip = _get_client_ip(request)
ip_hash = hashlib.sha256(raw_ip.encode()).hexdigest()[:16] if raw_ip else None
ip_partial = _mask_ip(raw_ip) if raw_ip else None
ua_string = request.headers.get("User-Agent", "")
browser, os_name = _parse_ua(ua_string)
country, city = await _geo_lookup(raw_ip or "")
session_id = _get_or_create_session_id(request, response, data.session_id)
metadata = _sanitize_client_metadata(data.metadata) or {}
referer = request.headers.get("referer")
if referer:
metadata["referrer"] = referer[:255]
normalized = data.model_copy(update={
"session_id": session_id,
"metadata": metadata or None,
})
await record_event(
db, normalized,
ip_hash=ip_hash,
ip_partial=ip_partial,
user_agent=ua_string[:512] if ua_string else None,
browser=browser,
os_name=os_name,
country=country,
city=city,
)
return {"ok": True}
@router.get("/api/v1/analytics/summary", response_model=AnalyticsSummary)
async def analytics_summary(
db: AsyncSession = Depends(get_db),
_=Depends(get_current_user),
):
"""Return analytics summary. Auth required."""
return await get_summary(db)
@router.get("/api/v1/analytics/bookings-summary", response_model=BookingOperationsSummary)
async def booking_operations_summary(
db: AsyncSession = Depends(get_db),
_=Depends(get_current_user),
):
"""Return booking operations reporting. Auth required."""
return await get_booking_operations_summary(db)