tweaks
This commit is contained in:
@@ -0,0 +1,348 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
from openpyxl import load_workbook
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.throughput import ProductionThroughput, ThroughputProduct
|
||||
|
||||
logger = logging.getLogger("data_entry_app.throughput")
|
||||
|
||||
PRODUCTION_SHEET = "Production"
|
||||
NAMES_SHEET = "Names"
|
||||
|
||||
# Anything at or above this kg/bag is treated as a bulka batch, not a per-bag count.
|
||||
_BULKA_BAG_SIZE_THRESHOLD = 100.0
|
||||
|
||||
|
||||
def normalise_staff_name(value: object) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value).strip()
|
||||
if not text:
|
||||
return None
|
||||
# Collapse internal whitespace, title-case for consistency.
|
||||
cleaned = " ".join(text.split())
|
||||
return cleaned
|
||||
|
||||
|
||||
def calculate_kg(quantity: float | None, quantity_type: str, bag_size: float | None) -> float:
|
||||
if quantity is None:
|
||||
return 0.0
|
||||
if quantity_type == "kg":
|
||||
return float(quantity)
|
||||
if bag_size is None:
|
||||
return 0.0
|
||||
return float(quantity) * float(bag_size)
|
||||
|
||||
|
||||
def qa_passed(entry: ProductionThroughput) -> bool:
|
||||
return bool(entry.scales_checked and entry.label_correct and entry.bag_sealed and entry.pallet_good_condition)
|
||||
|
||||
|
||||
def serialize_entry(entry: ProductionThroughput) -> dict:
|
||||
return {
|
||||
"id": entry.id,
|
||||
"tenant_id": entry.tenant_id,
|
||||
"production_date": entry.production_date,
|
||||
"product_id": entry.product_id,
|
||||
"product_name_snapshot": entry.product_name_snapshot,
|
||||
"bag_size": entry.bag_size,
|
||||
"scales_checked": entry.scales_checked,
|
||||
"label_correct": entry.label_correct,
|
||||
"bag_sealed": entry.bag_sealed,
|
||||
"pallet_good_condition": entry.pallet_good_condition,
|
||||
"sample_box_no": entry.sample_box_no,
|
||||
"test_weight_1": entry.test_weight_1,
|
||||
"test_weight_2": entry.test_weight_2,
|
||||
"test_weight_3": entry.test_weight_3,
|
||||
"test_weight_4": entry.test_weight_4,
|
||||
"test_weight_5": entry.test_weight_5,
|
||||
"quantity": entry.quantity,
|
||||
"quantity_type": entry.quantity_type,
|
||||
"calculated_kg": entry.calculated_kg,
|
||||
"staff_name": entry.staff_name,
|
||||
"notes": entry.notes,
|
||||
"qa_passed": qa_passed(entry),
|
||||
"created_by": entry.created_by,
|
||||
"created_at": entry.created_at,
|
||||
"updated_at": entry.updated_at,
|
||||
}
|
||||
|
||||
|
||||
def _coerce_bool(value: object) -> bool:
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if value is None:
|
||||
return True
|
||||
if isinstance(value, (int, float)):
|
||||
return bool(value)
|
||||
text = str(value).strip().lower()
|
||||
if text in {"yes", "y", "true", "1", "pass", "ok", "x", "checked"}:
|
||||
return True
|
||||
if text in {"no", "n", "false", "0", "fail"}:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _coerce_float(value: object) -> float | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
return float(value)
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
text = str(value).strip().replace(",", "")
|
||||
if not text:
|
||||
return None
|
||||
try:
|
||||
return float(text)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _coerce_text(value: object) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value).strip()
|
||||
if not text or text.lower() in {"#value!", "#n/a", "n/a"}:
|
||||
return None
|
||||
return text
|
||||
|
||||
|
||||
def _coerce_date(value: object) -> date | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
return value.date()
|
||||
if isinstance(value, date):
|
||||
return value
|
||||
text = str(value).strip()
|
||||
if not text:
|
||||
return None
|
||||
for fmt in ("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y"):
|
||||
try:
|
||||
return datetime.strptime(text, fmt).date()
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _infer_bulka_default(name: str, bag_size: float | None) -> bool:
|
||||
lowered = name.lower()
|
||||
if "bulka" in lowered:
|
||||
return True
|
||||
if bag_size is None:
|
||||
return False
|
||||
return bag_size >= _BULKA_BAG_SIZE_THRESHOLD
|
||||
|
||||
|
||||
def import_names_sheet(db: Session, workbook, tenant_id: str) -> tuple[int, int]:
|
||||
"""Upsert product master from the Names sheet. Returns (created, updated)."""
|
||||
if NAMES_SHEET not in workbook.sheetnames:
|
||||
return (0, 0)
|
||||
|
||||
ws = workbook[NAMES_SHEET]
|
||||
existing: dict[tuple[str, str | None], ThroughputProduct] = {}
|
||||
by_item: dict[str, ThroughputProduct] = {}
|
||||
by_name: dict[str, ThroughputProduct] = {}
|
||||
for product in db.scalars(
|
||||
select(ThroughputProduct).where(ThroughputProduct.tenant_id == tenant_id)
|
||||
).all():
|
||||
if product.item_id:
|
||||
by_item[str(product.item_id)] = product
|
||||
by_name[product.name.lower()] = product
|
||||
|
||||
created = 0
|
||||
updated = 0
|
||||
for row in ws.iter_rows(min_row=2, values_only=True):
|
||||
if not row:
|
||||
continue
|
||||
name = _coerce_text(row[0] if len(row) > 0 else None)
|
||||
if not name:
|
||||
continue
|
||||
item_id_raw = row[1] if len(row) > 1 else None
|
||||
item_id = None
|
||||
if item_id_raw is not None:
|
||||
if isinstance(item_id_raw, float) and item_id_raw.is_integer():
|
||||
item_id = str(int(item_id_raw))
|
||||
else:
|
||||
item_id = _coerce_text(item_id_raw)
|
||||
|
||||
product = (by_item.get(item_id) if item_id else None) or by_name.get(name.lower())
|
||||
if product is None:
|
||||
product = ThroughputProduct(
|
||||
tenant_id=tenant_id,
|
||||
item_id=item_id,
|
||||
name=name,
|
||||
default_bag_size=None,
|
||||
is_bulka_default="bulka" in name.lower(),
|
||||
active=True,
|
||||
notes="Imported from Operations Throughput.xlsx",
|
||||
)
|
||||
db.add(product)
|
||||
created += 1
|
||||
if item_id:
|
||||
by_item[item_id] = product
|
||||
by_name[name.lower()] = product
|
||||
else:
|
||||
if item_id and not product.item_id:
|
||||
product.item_id = item_id
|
||||
if name and product.name != name:
|
||||
product.name = name
|
||||
updated += 1
|
||||
|
||||
db.flush()
|
||||
return (created, updated)
|
||||
|
||||
|
||||
def import_production_sheet(db: Session, workbook, tenant_id: str) -> tuple[int, int]:
|
||||
"""Import the Production sheet. Returns (imported, skipped)."""
|
||||
if PRODUCTION_SHEET not in workbook.sheetnames:
|
||||
return (0, 0)
|
||||
|
||||
ws = workbook[PRODUCTION_SHEET]
|
||||
# Header row is row 3 in the sheet (rows 1 and 2 are display banners).
|
||||
products_by_name: dict[str, ThroughputProduct] = {
|
||||
product.name.lower(): product
|
||||
for product in db.scalars(
|
||||
select(ThroughputProduct).where(ThroughputProduct.tenant_id == tenant_id)
|
||||
).all()
|
||||
}
|
||||
|
||||
bag_size_seen: dict[int, list[float]] = {}
|
||||
imported = 0
|
||||
skipped = 0
|
||||
|
||||
for row in ws.iter_rows(min_row=4, values_only=True):
|
||||
if not row or len(row) < 15:
|
||||
skipped += 1
|
||||
continue
|
||||
production_date = _coerce_date(row[0])
|
||||
product_name = _coerce_text(row[1])
|
||||
if production_date is None or not product_name:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
bag_size = _coerce_float(row[2])
|
||||
scales = _coerce_bool(row[3])
|
||||
label = _coerce_bool(row[4])
|
||||
sealed = _coerce_bool(row[5])
|
||||
pallet = _coerce_bool(row[6])
|
||||
sample_box = _coerce_text(row[7])
|
||||
tw1 = _coerce_float(row[8])
|
||||
tw2 = _coerce_float(row[9])
|
||||
tw3 = _coerce_float(row[10])
|
||||
tw4 = _coerce_float(row[11])
|
||||
tw5 = _coerce_float(row[12])
|
||||
quantity = _coerce_float(row[13]) or 0.0
|
||||
staff = normalise_staff_name(row[14])
|
||||
notes = _coerce_text(row[15]) if len(row) > 15 else None
|
||||
|
||||
# Infer quantity_type: bulka-style rows have a blank or very large bag size.
|
||||
if bag_size is None or bag_size >= _BULKA_BAG_SIZE_THRESHOLD or "bulka" in product_name.lower():
|
||||
quantity_type = "kg"
|
||||
else:
|
||||
quantity_type = "bags"
|
||||
|
||||
product = products_by_name.get(product_name.lower())
|
||||
if product is None:
|
||||
product = ThroughputProduct(
|
||||
tenant_id=tenant_id,
|
||||
item_id=None,
|
||||
name=product_name,
|
||||
default_bag_size=bag_size,
|
||||
is_bulka_default=_infer_bulka_default(product_name, bag_size),
|
||||
active=True,
|
||||
notes="Auto-created during Operations Throughput import",
|
||||
)
|
||||
db.add(product)
|
||||
db.flush()
|
||||
products_by_name[product_name.lower()] = product
|
||||
|
||||
if product.id is not None and bag_size is not None and bag_size > 0:
|
||||
bag_size_seen.setdefault(product.id, []).append(bag_size)
|
||||
|
||||
calculated = calculate_kg(quantity, quantity_type, bag_size)
|
||||
entry = ProductionThroughput(
|
||||
tenant_id=tenant_id,
|
||||
production_date=production_date,
|
||||
product_id=product.id,
|
||||
product_name_snapshot=product_name,
|
||||
bag_size=bag_size,
|
||||
scales_checked=scales,
|
||||
label_correct=label,
|
||||
bag_sealed=sealed,
|
||||
pallet_good_condition=pallet,
|
||||
sample_box_no=sample_box,
|
||||
test_weight_1=tw1,
|
||||
test_weight_2=tw2,
|
||||
test_weight_3=tw3,
|
||||
test_weight_4=tw4,
|
||||
test_weight_5=tw5,
|
||||
quantity=quantity,
|
||||
quantity_type=quantity_type,
|
||||
calculated_kg=calculated,
|
||||
staff_name=staff,
|
||||
notes=notes,
|
||||
created_by="workbook-import",
|
||||
)
|
||||
db.add(entry)
|
||||
imported += 1
|
||||
|
||||
# Backfill default_bag_size on products that don't have one but appear in entries.
|
||||
for product_id, sizes in bag_size_seen.items():
|
||||
product = db.get(ThroughputProduct, product_id)
|
||||
if product and product.default_bag_size is None:
|
||||
# Use the most common bag size seen.
|
||||
common = max(set(sizes), key=sizes.count)
|
||||
product.default_bag_size = common
|
||||
if not product.is_bulka_default:
|
||||
product.is_bulka_default = _infer_bulka_default(product.name, common)
|
||||
|
||||
db.flush()
|
||||
return (imported, skipped)
|
||||
|
||||
|
||||
def import_workbook(db: Session, workbook_path: Path, tenant_id: str) -> dict:
|
||||
workbook = load_workbook(workbook_path, data_only=True)
|
||||
products_created, products_updated = import_names_sheet(db, workbook, tenant_id)
|
||||
entries_imported, entries_skipped = import_production_sheet(db, workbook, tenant_id)
|
||||
return {
|
||||
"products_created": products_created,
|
||||
"products_updated": products_updated,
|
||||
"entries_imported": entries_imported,
|
||||
"entries_skipped": entries_skipped,
|
||||
}
|
||||
|
||||
|
||||
def workbook_candidates() -> Iterable[Path]:
|
||||
repo_root = Path(__file__).resolve().parents[3]
|
||||
candidates = [
|
||||
repo_root / "Operations Throughput.xlsx",
|
||||
repo_root.parent / "Operations Throughput.xlsx",
|
||||
Path.cwd() / "Operations Throughput.xlsx",
|
||||
Path("/srv/lean101-clients") / "Operations Throughput.xlsx",
|
||||
Path("/app") / "Operations Throughput.xlsx",
|
||||
]
|
||||
seen: set[str] = set()
|
||||
ordered: list[Path] = []
|
||||
for candidate in candidates:
|
||||
key = str(candidate)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
ordered.append(candidate)
|
||||
return ordered
|
||||
|
||||
|
||||
def resolve_workbook_path() -> Path | None:
|
||||
for candidate in workbook_candidates():
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
return None
|
||||
Reference in New Issue
Block a user