Files
data-entry-app/backend/app/services/throughput_service.py
T
2026-05-31 20:19:44 +12:00

349 lines
11 KiB
Python

from __future__ import annotations
import logging
from datetime import date, datetime
from pathlib import Path
from typing import Iterable
from openpyxl import load_workbook
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.models.throughput import ProductionThroughput, ThroughputProduct
logger = logging.getLogger("data_entry_app.throughput")
PRODUCTION_SHEET = "Production"
NAMES_SHEET = "Names"
# Anything at or above this kg/bag is treated as a bulka batch, not a per-bag count.
_BULKA_BAG_SIZE_THRESHOLD = 100.0
def normalise_staff_name(value: object) -> str | None:
if value is None:
return None
text = str(value).strip()
if not text:
return None
# Collapse internal whitespace, title-case for consistency.
cleaned = " ".join(text.split())
return cleaned
def calculate_kg(quantity: float | None, quantity_type: str, bag_size: float | None) -> float:
if quantity is None:
return 0.0
if quantity_type == "kg":
return float(quantity)
if bag_size is None:
return 0.0
return float(quantity) * float(bag_size)
def qa_passed(entry: ProductionThroughput) -> bool:
return bool(entry.scales_checked and entry.label_correct and entry.bag_sealed and entry.pallet_good_condition)
def serialize_entry(entry: ProductionThroughput) -> dict:
return {
"id": entry.id,
"tenant_id": entry.tenant_id,
"production_date": entry.production_date,
"product_id": entry.product_id,
"product_name_snapshot": entry.product_name_snapshot,
"bag_size": entry.bag_size,
"scales_checked": entry.scales_checked,
"label_correct": entry.label_correct,
"bag_sealed": entry.bag_sealed,
"pallet_good_condition": entry.pallet_good_condition,
"sample_box_no": entry.sample_box_no,
"test_weight_1": entry.test_weight_1,
"test_weight_2": entry.test_weight_2,
"test_weight_3": entry.test_weight_3,
"test_weight_4": entry.test_weight_4,
"test_weight_5": entry.test_weight_5,
"quantity": entry.quantity,
"quantity_type": entry.quantity_type,
"calculated_kg": entry.calculated_kg,
"staff_name": entry.staff_name,
"notes": entry.notes,
"qa_passed": qa_passed(entry),
"created_by": entry.created_by,
"created_at": entry.created_at,
"updated_at": entry.updated_at,
}
def _coerce_bool(value: object) -> bool:
if isinstance(value, bool):
return value
if value is None:
return True
if isinstance(value, (int, float)):
return bool(value)
text = str(value).strip().lower()
if text in {"yes", "y", "true", "1", "pass", "ok", "x", "checked"}:
return True
if text in {"no", "n", "false", "0", "fail"}:
return False
return True
def _coerce_float(value: object) -> float | None:
if value is None or value == "":
return None
if isinstance(value, bool):
return float(value)
if isinstance(value, (int, float)):
return float(value)
text = str(value).strip().replace(",", "")
if not text:
return None
try:
return float(text)
except ValueError:
return None
def _coerce_text(value: object) -> str | None:
if value is None:
return None
text = str(value).strip()
if not text or text.lower() in {"#value!", "#n/a", "n/a"}:
return None
return text
def _coerce_date(value: object) -> date | None:
if value is None:
return None
if isinstance(value, datetime):
return value.date()
if isinstance(value, date):
return value
text = str(value).strip()
if not text:
return None
for fmt in ("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y"):
try:
return datetime.strptime(text, fmt).date()
except ValueError:
continue
return None
def _infer_bulka_default(name: str, bag_size: float | None) -> bool:
lowered = name.lower()
if "bulka" in lowered:
return True
if bag_size is None:
return False
return bag_size >= _BULKA_BAG_SIZE_THRESHOLD
def import_names_sheet(db: Session, workbook, tenant_id: str) -> tuple[int, int]:
"""Upsert product master from the Names sheet. Returns (created, updated)."""
if NAMES_SHEET not in workbook.sheetnames:
return (0, 0)
ws = workbook[NAMES_SHEET]
existing: dict[tuple[str, str | None], ThroughputProduct] = {}
by_item: dict[str, ThroughputProduct] = {}
by_name: dict[str, ThroughputProduct] = {}
for product in db.scalars(
select(ThroughputProduct).where(ThroughputProduct.tenant_id == tenant_id)
).all():
if product.item_id:
by_item[str(product.item_id)] = product
by_name[product.name.lower()] = product
created = 0
updated = 0
for row in ws.iter_rows(min_row=2, values_only=True):
if not row:
continue
name = _coerce_text(row[0] if len(row) > 0 else None)
if not name:
continue
item_id_raw = row[1] if len(row) > 1 else None
item_id = None
if item_id_raw is not None:
if isinstance(item_id_raw, float) and item_id_raw.is_integer():
item_id = str(int(item_id_raw))
else:
item_id = _coerce_text(item_id_raw)
product = (by_item.get(item_id) if item_id else None) or by_name.get(name.lower())
if product is None:
product = ThroughputProduct(
tenant_id=tenant_id,
item_id=item_id,
name=name,
default_bag_size=None,
is_bulka_default="bulka" in name.lower(),
active=True,
notes="Imported from Operations Throughput.xlsx",
)
db.add(product)
created += 1
if item_id:
by_item[item_id] = product
by_name[name.lower()] = product
else:
if item_id and not product.item_id:
product.item_id = item_id
if name and product.name != name:
product.name = name
updated += 1
db.flush()
return (created, updated)
def import_production_sheet(db: Session, workbook, tenant_id: str) -> tuple[int, int]:
"""Import the Production sheet. Returns (imported, skipped)."""
if PRODUCTION_SHEET not in workbook.sheetnames:
return (0, 0)
ws = workbook[PRODUCTION_SHEET]
# Header row is row 3 in the sheet (rows 1 and 2 are display banners).
products_by_name: dict[str, ThroughputProduct] = {
product.name.lower(): product
for product in db.scalars(
select(ThroughputProduct).where(ThroughputProduct.tenant_id == tenant_id)
).all()
}
bag_size_seen: dict[int, list[float]] = {}
imported = 0
skipped = 0
for row in ws.iter_rows(min_row=4, values_only=True):
if not row or len(row) < 15:
skipped += 1
continue
production_date = _coerce_date(row[0])
product_name = _coerce_text(row[1])
if production_date is None or not product_name:
skipped += 1
continue
bag_size = _coerce_float(row[2])
scales = _coerce_bool(row[3])
label = _coerce_bool(row[4])
sealed = _coerce_bool(row[5])
pallet = _coerce_bool(row[6])
sample_box = _coerce_text(row[7])
tw1 = _coerce_float(row[8])
tw2 = _coerce_float(row[9])
tw3 = _coerce_float(row[10])
tw4 = _coerce_float(row[11])
tw5 = _coerce_float(row[12])
quantity = _coerce_float(row[13]) or 0.0
staff = normalise_staff_name(row[14])
notes = _coerce_text(row[15]) if len(row) > 15 else None
# Infer quantity_type: bulka-style rows have a blank or very large bag size.
if bag_size is None or bag_size >= _BULKA_BAG_SIZE_THRESHOLD or "bulka" in product_name.lower():
quantity_type = "kg"
else:
quantity_type = "bags"
product = products_by_name.get(product_name.lower())
if product is None:
product = ThroughputProduct(
tenant_id=tenant_id,
item_id=None,
name=product_name,
default_bag_size=bag_size,
is_bulka_default=_infer_bulka_default(product_name, bag_size),
active=True,
notes="Auto-created during Operations Throughput import",
)
db.add(product)
db.flush()
products_by_name[product_name.lower()] = product
if product.id is not None and bag_size is not None and bag_size > 0:
bag_size_seen.setdefault(product.id, []).append(bag_size)
calculated = calculate_kg(quantity, quantity_type, bag_size)
entry = ProductionThroughput(
tenant_id=tenant_id,
production_date=production_date,
product_id=product.id,
product_name_snapshot=product_name,
bag_size=bag_size,
scales_checked=scales,
label_correct=label,
bag_sealed=sealed,
pallet_good_condition=pallet,
sample_box_no=sample_box,
test_weight_1=tw1,
test_weight_2=tw2,
test_weight_3=tw3,
test_weight_4=tw4,
test_weight_5=tw5,
quantity=quantity,
quantity_type=quantity_type,
calculated_kg=calculated,
staff_name=staff,
notes=notes,
created_by="workbook-import",
)
db.add(entry)
imported += 1
# Backfill default_bag_size on products that don't have one but appear in entries.
for product_id, sizes in bag_size_seen.items():
product = db.get(ThroughputProduct, product_id)
if product and product.default_bag_size is None:
# Use the most common bag size seen.
common = max(set(sizes), key=sizes.count)
product.default_bag_size = common
if not product.is_bulka_default:
product.is_bulka_default = _infer_bulka_default(product.name, common)
db.flush()
return (imported, skipped)
def import_workbook(db: Session, workbook_path: Path, tenant_id: str) -> dict:
workbook = load_workbook(workbook_path, data_only=True)
products_created, products_updated = import_names_sheet(db, workbook, tenant_id)
entries_imported, entries_skipped = import_production_sheet(db, workbook, tenant_id)
return {
"products_created": products_created,
"products_updated": products_updated,
"entries_imported": entries_imported,
"entries_skipped": entries_skipped,
}
def workbook_candidates() -> Iterable[Path]:
repo_root = Path(__file__).resolve().parents[3]
candidates = [
repo_root / "Operations Throughput.xlsx",
repo_root.parent / "Operations Throughput.xlsx",
Path.cwd() / "Operations Throughput.xlsx",
Path("/srv/lean101-clients") / "Operations Throughput.xlsx",
Path("/app") / "Operations Throughput.xlsx",
]
seen: set[str] = set()
ordered: list[Path] = []
for candidate in candidates:
key = str(candidate)
if key in seen:
continue
seen.add(key)
ordered.append(candidate)
return ordered
def resolve_workbook_path() -> Path | None:
for candidate in workbook_candidates():
if candidate.exists():
return candidate
return None