tweaks

2026-05-31 20:19:44 +12:00
parent 2f2466ecac
commit 84792c0947
59 changed files with 5412 additions and 898 deletions
@@ -0,0 +1,348 @@
+from __future__ import annotations
+
+import logging
+from datetime import date, datetime
+from pathlib import Path
+from typing import Iterable
+
+from openpyxl import load_workbook
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from app.models.throughput import ProductionThroughput, ThroughputProduct
+
+logger = logging.getLogger("data_entry_app.throughput")
+
+PRODUCTION_SHEET = "Production"
+NAMES_SHEET = "Names"
+
+# Anything at or above this kg/bag is treated as a bulka batch, not a per-bag count.
+_BULKA_BAG_SIZE_THRESHOLD = 100.0
+
+
+def normalise_staff_name(value: object) -> str | None:
+    if value is None:
+        return None
+    text = str(value).strip()
+    if not text:
+        return None
+    # Collapse internal whitespace, title-case for consistency.
+    cleaned = " ".join(text.split())
+    return cleaned
+
+
+def calculate_kg(quantity: float | None, quantity_type: str, bag_size: float | None) -> float:
+    if quantity is None:
+        return 0.0
+    if quantity_type == "kg":
+        return float(quantity)
+    if bag_size is None:
+        return 0.0
+    return float(quantity) * float(bag_size)
+
+
+def qa_passed(entry: ProductionThroughput) -> bool:
+    return bool(entry.scales_checked and entry.label_correct and entry.bag_sealed and entry.pallet_good_condition)
+
+
+def serialize_entry(entry: ProductionThroughput) -> dict:
+    return {
+        "id": entry.id,
+        "tenant_id": entry.tenant_id,
+        "production_date": entry.production_date,
+        "product_id": entry.product_id,
+        "product_name_snapshot": entry.product_name_snapshot,
+        "bag_size": entry.bag_size,
+        "scales_checked": entry.scales_checked,
+        "label_correct": entry.label_correct,
+        "bag_sealed": entry.bag_sealed,
+        "pallet_good_condition": entry.pallet_good_condition,
+        "sample_box_no": entry.sample_box_no,
+        "test_weight_1": entry.test_weight_1,
+        "test_weight_2": entry.test_weight_2,
+        "test_weight_3": entry.test_weight_3,
+        "test_weight_4": entry.test_weight_4,
+        "test_weight_5": entry.test_weight_5,
+        "quantity": entry.quantity,
+        "quantity_type": entry.quantity_type,
+        "calculated_kg": entry.calculated_kg,
+        "staff_name": entry.staff_name,
+        "notes": entry.notes,
+        "qa_passed": qa_passed(entry),
+        "created_by": entry.created_by,
+        "created_at": entry.created_at,
+        "updated_at": entry.updated_at,
+    }
+
+
+def _coerce_bool(value: object) -> bool:
+    if isinstance(value, bool):
+        return value
+    if value is None:
+        return True
+    if isinstance(value, (int, float)):
+        return bool(value)
+    text = str(value).strip().lower()
+    if text in {"yes", "y", "true", "1", "pass", "ok", "x", "checked"}:
+        return True
+    if text in {"no", "n", "false", "0", "fail"}:
+        return False
+    return True
+
+
+def _coerce_float(value: object) -> float | None:
+    if value is None or value == "":
+        return None
+    if isinstance(value, bool):
+        return float(value)
+    if isinstance(value, (int, float)):
+        return float(value)
+    text = str(value).strip().replace(",", "")
+    if not text:
+        return None
+    try:
+        return float(text)
+    except ValueError:
+        return None
+
+
+def _coerce_text(value: object) -> str | None:
+    if value is None:
+        return None
+    text = str(value).strip()
+    if not text or text.lower() in {"#value!", "#n/a", "n/a"}:
+        return None
+    return text
+
+
+def _coerce_date(value: object) -> date | None:
+    if value is None:
+        return None
+    if isinstance(value, datetime):
+        return value.date()
+    if isinstance(value, date):
+        return value
+    text = str(value).strip()
+    if not text:
+        return None
+    for fmt in ("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y"):
+        try:
+            return datetime.strptime(text, fmt).date()
+        except ValueError:
+            continue
+    return None
+
+
+def _infer_bulka_default(name: str, bag_size: float | None) -> bool:
+    lowered = name.lower()
+    if "bulka" in lowered:
+        return True
+    if bag_size is None:
+        return False
+    return bag_size >= _BULKA_BAG_SIZE_THRESHOLD
+
+
+def import_names_sheet(db: Session, workbook, tenant_id: str) -> tuple[int, int]:
+    """Upsert product master from the Names sheet. Returns (created, updated)."""
+    if NAMES_SHEET not in workbook.sheetnames:
+        return (0, 0)
+
+    ws = workbook[NAMES_SHEET]
+    existing: dict[tuple[str, str | None], ThroughputProduct] = {}
+    by_item: dict[str, ThroughputProduct] = {}
+    by_name: dict[str, ThroughputProduct] = {}
+    for product in db.scalars(
+        select(ThroughputProduct).where(ThroughputProduct.tenant_id == tenant_id)
+    ).all():
+        if product.item_id:
+            by_item[str(product.item_id)] = product
+        by_name[product.name.lower()] = product
+
+    created = 0
+    updated = 0
+    for row in ws.iter_rows(min_row=2, values_only=True):
+        if not row:
+            continue
+        name = _coerce_text(row[0] if len(row) > 0 else None)
+        if not name:
+            continue
+        item_id_raw = row[1] if len(row) > 1 else None
+        item_id = None
+        if item_id_raw is not None:
+            if isinstance(item_id_raw, float) and item_id_raw.is_integer():
+                item_id = str(int(item_id_raw))
+            else:
+                item_id = _coerce_text(item_id_raw)
+
+        product = (by_item.get(item_id) if item_id else None) or by_name.get(name.lower())
+        if product is None:
+            product = ThroughputProduct(
+                tenant_id=tenant_id,
+                item_id=item_id,
+                name=name,
+                default_bag_size=None,
+                is_bulka_default="bulka" in name.lower(),
+                active=True,
+                notes="Imported from Operations Throughput.xlsx",
+            )
+            db.add(product)
+            created += 1
+            if item_id:
+                by_item[item_id] = product
+            by_name[name.lower()] = product
+        else:
+            if item_id and not product.item_id:
+                product.item_id = item_id
+            if name and product.name != name:
+                product.name = name
+            updated += 1
+
+    db.flush()
+    return (created, updated)
+
+
+def import_production_sheet(db: Session, workbook, tenant_id: str) -> tuple[int, int]:
+    """Import the Production sheet. Returns (imported, skipped)."""
+    if PRODUCTION_SHEET not in workbook.sheetnames:
+        return (0, 0)
+
+    ws = workbook[PRODUCTION_SHEET]
+    # Header row is row 3 in the sheet (rows 1 and 2 are display banners).
+    products_by_name: dict[str, ThroughputProduct] = {
+        product.name.lower(): product
+        for product in db.scalars(
+            select(ThroughputProduct).where(ThroughputProduct.tenant_id == tenant_id)
+        ).all()
+    }
+
+    bag_size_seen: dict[int, list[float]] = {}
+    imported = 0
+    skipped = 0
+
+    for row in ws.iter_rows(min_row=4, values_only=True):
+        if not row or len(row) < 15:
+            skipped += 1
+            continue
+        production_date = _coerce_date(row[0])
+        product_name = _coerce_text(row[1])
+        if production_date is None or not product_name:
+            skipped += 1
+            continue
+
+        bag_size = _coerce_float(row[2])
+        scales = _coerce_bool(row[3])
+        label = _coerce_bool(row[4])
+        sealed = _coerce_bool(row[5])
+        pallet = _coerce_bool(row[6])
+        sample_box = _coerce_text(row[7])
+        tw1 = _coerce_float(row[8])
+        tw2 = _coerce_float(row[9])
+        tw3 = _coerce_float(row[10])
+        tw4 = _coerce_float(row[11])
+        tw5 = _coerce_float(row[12])
+        quantity = _coerce_float(row[13]) or 0.0
+        staff = normalise_staff_name(row[14])
+        notes = _coerce_text(row[15]) if len(row) > 15 else None
+
+        # Infer quantity_type: bulka-style rows have a blank or very large bag size.
+        if bag_size is None or bag_size >= _BULKA_BAG_SIZE_THRESHOLD or "bulka" in product_name.lower():
+            quantity_type = "kg"
+        else:
+            quantity_type = "bags"
+
+        product = products_by_name.get(product_name.lower())
+        if product is None:
+            product = ThroughputProduct(
+                tenant_id=tenant_id,
+                item_id=None,
+                name=product_name,
+                default_bag_size=bag_size,
+                is_bulka_default=_infer_bulka_default(product_name, bag_size),
+                active=True,
+                notes="Auto-created during Operations Throughput import",
+            )
+            db.add(product)
+            db.flush()
+            products_by_name[product_name.lower()] = product
+
+        if product.id is not None and bag_size is not None and bag_size > 0:
+            bag_size_seen.setdefault(product.id, []).append(bag_size)
+
+        calculated = calculate_kg(quantity, quantity_type, bag_size)
+        entry = ProductionThroughput(
+            tenant_id=tenant_id,
+            production_date=production_date,
+            product_id=product.id,
+            product_name_snapshot=product_name,
+            bag_size=bag_size,
+            scales_checked=scales,
+            label_correct=label,
+            bag_sealed=sealed,
+            pallet_good_condition=pallet,
+            sample_box_no=sample_box,
+            test_weight_1=tw1,
+            test_weight_2=tw2,
+            test_weight_3=tw3,
+            test_weight_4=tw4,
+            test_weight_5=tw5,
+            quantity=quantity,
+            quantity_type=quantity_type,
+            calculated_kg=calculated,
+            staff_name=staff,
+            notes=notes,
+            created_by="workbook-import",
+        )
+        db.add(entry)
+        imported += 1
+
+    # Backfill default_bag_size on products that don't have one but appear in entries.
+    for product_id, sizes in bag_size_seen.items():
+        product = db.get(ThroughputProduct, product_id)
+        if product and product.default_bag_size is None:
+            # Use the most common bag size seen.
+            common = max(set(sizes), key=sizes.count)
+            product.default_bag_size = common
+            if not product.is_bulka_default:
+                product.is_bulka_default = _infer_bulka_default(product.name, common)
+
+    db.flush()
+    return (imported, skipped)
+
+
+def import_workbook(db: Session, workbook_path: Path, tenant_id: str) -> dict:
+    workbook = load_workbook(workbook_path, data_only=True)
+    products_created, products_updated = import_names_sheet(db, workbook, tenant_id)
+    entries_imported, entries_skipped = import_production_sheet(db, workbook, tenant_id)
+    return {
+        "products_created": products_created,
+        "products_updated": products_updated,
+        "entries_imported": entries_imported,
+        "entries_skipped": entries_skipped,
+    }
+
+
+def workbook_candidates() -> Iterable[Path]:
+    repo_root = Path(__file__).resolve().parents[3]
+    candidates = [
+        repo_root / "Operations Throughput.xlsx",
+        repo_root.parent / "Operations Throughput.xlsx",
+        Path.cwd() / "Operations Throughput.xlsx",
+        Path("/srv/lean101-clients") / "Operations Throughput.xlsx",
+        Path("/app") / "Operations Throughput.xlsx",
+    ]
+    seen: set[str] = set()
+    ordered: list[Path] = []
+    for candidate in candidates:
+        key = str(candidate)
+        if key in seen:
+            continue
+        seen.add(key)
+        ordered.append(candidate)
+    return ordered
+
+
+def resolve_workbook_path() -> Path | None:
+    for candidate in workbook_candidates():
+        if candidate.exists():
+            return candidate
+    return None