This commit is contained in:
2026-05-31 20:19:44 +12:00
parent 2f2466ecac
commit 84792c0947
59 changed files with 5412 additions and 898 deletions
+318 -13
View File
@@ -9,21 +9,26 @@ import re
from openpyxl import load_workbook
from sqlalchemy import select
from sqlalchemy.orm import selectinload
from app.db.session import Base, SessionLocal, engine
from app.models.assumption import FreightCostRule, PackagingCostRule, ProcessCostRule
from app.models.client_access import ClientAccessAuditEvent, ClientAccount, ClientFeatureAccess, ClientUser, ClientUserModulePermission
from app.models.mix import Mix, MixIngredient
from app.models.product import Product
from app.models.product import Product, ProductIngredient
from app.models.raw_material import RawMaterial, RawMaterialPriceVersion
from app.models.throughput import ProductionThroughput, ThroughputProduct
from app.seed_access import seed_access
from app.services.client_access_service import MODULE_CATALOG, default_access_level_for_role
from app.services.throughput_service import import_workbook as import_throughput_workbook
from app.services.throughput_service import resolve_workbook_path as resolve_throughput_workbook_path
TENANT_ID = "hunter-premium-produce"
WORKBOOK_EFFECTIVE_DATE = date(2025, 9, 1)
WORKBOOK_SENTINEL_ITEM_ID = "404266"
WORKBOOK_FILENAME = "Input Cost Spreadsheet(1).xlsx"
WORKBOOK_FILENAME = "1.xlsx"
LEGACY_WORKBOOK_FILENAME = "Input Cost Spreadsheet(1).xlsx"
logger = logging.getLogger("data_entry_app.seed")
HIDDEN_PRODUCT_CLIENTS = frozenset(
{
@@ -46,11 +51,18 @@ def _workbook_candidates() -> list[Path]:
candidates = [
Path(env_path) if env_path else None,
repo_root / "input_data" / WORKBOOK_FILENAME,
cwd / "input_data" / WORKBOOK_FILENAME,
Path("/srv/lean101-clients") / WORKBOOK_FILENAME,
repo_root / WORKBOOK_FILENAME,
cwd / WORKBOOK_FILENAME,
Path("/app") / WORKBOOK_FILENAME,
Path("/") / WORKBOOK_FILENAME,
repo_root / LEGACY_WORKBOOK_FILENAME,
cwd / LEGACY_WORKBOOK_FILENAME,
Path("/srv/lean101-clients") / LEGACY_WORKBOOK_FILENAME,
Path("/app") / LEGACY_WORKBOOK_FILENAME,
Path("/") / LEGACY_WORKBOOK_FILENAME,
]
ordered: list[Path] = []
@@ -73,9 +85,6 @@ def _resolve_workbook_path() -> Path:
return _workbook_candidates()[0]
WORKBOOK_PATH = _resolve_workbook_path()
def _text(value) -> str | None:
if value is None:
return None
@@ -178,7 +187,21 @@ def _build_process_key(label, grading_cost: float, bagging_cost: float, cracking
return f"{base}_g{int(round(grading_cost * 1000))}_b{int(round(bagging_cost * 1000))}_c{int(round(cracking_cost * 1000))}"
def _load_workbook():
def _load_workbook(*required_sheets: str):
for candidate in _workbook_candidates():
if not candidate.exists():
continue
workbook = load_workbook(candidate, data_only=True)
if all(sheet_name in workbook.sheetnames for sheet_name in required_sheets):
return workbook
if required_sheets:
raise FileNotFoundError(
"No workbook with required sheets found. "
f"Required sheets: {', '.join(required_sheets)}. "
f"Checked: {', '.join(str(path) for path in _workbook_candidates())}"
)
workbook_path = _resolve_workbook_path()
if not workbook_path.exists():
raise FileNotFoundError(
@@ -258,6 +281,44 @@ def _read_mix_rows(workbook) -> dict[tuple[str, str], dict]:
return best_rows
def _read_product_ingredient_rows(workbook) -> dict[tuple[str, str], dict]:
worksheet = workbook["mix_quantites_per_client_per_pr"]
header_row = next(worksheet.iter_rows(min_row=1, max_row=1, values_only=True))
ingredient_names = [_text(value) for value in header_row[3:] if _text(value)]
rows: dict[tuple[str, str], dict] = {}
for row in worksheet.iter_rows(min_row=2, values_only=True):
client_name = _text(row[0])
product_name = _text(row[1])
if not client_name or not product_name:
continue
ingredients = []
for sort_order, (ingredient_name, quantity) in enumerate(zip(ingredient_names, row[3 : 3 + len(ingredient_names)]), start=1):
numeric_quantity = _number(quantity)
if ingredient_name and numeric_quantity and numeric_quantity > 0:
ingredients.append(
{
"raw_material_name": ingredient_name,
"quantity_kg": numeric_quantity,
"sort_order": sort_order,
}
)
if not ingredients:
continue
total_kg = _number(row[2]) or round(sum(item["quantity_kg"] for item in ingredients), 4)
rows[(client_name, product_name)] = {
"client_name": client_name,
"product_name": product_name,
"total_kg": total_kg,
"ingredients": ingredients,
}
return rows
def _read_product_rows(workbook) -> list[dict]:
worksheet = workbook["Product Cost - Price"]
raw_rows: list[dict] = []
@@ -606,6 +667,184 @@ def _upsert_products(db, products: list[dict], mix_lookup: dict[tuple[str, str],
product.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"
def _upsert_product_ingredients(
db,
*,
product_rows: list[dict],
product_ingredient_rows: dict[tuple[str, str], dict],
raw_material_map: dict[str, RawMaterial],
) -> None:
products = db.scalars(
select(Product).where(Product.tenant_id == TENANT_ID).options(selectinload(Product.mix))
).all()
products_by_formula_key: dict[tuple[str, str], list[Product]] = {}
for product in products:
candidate_keys = {
(product.client_name, product.name),
}
if product.mix is not None:
candidate_keys.add((product.client_name, product.mix.name))
for key in candidate_keys:
products_by_formula_key.setdefault(key, []).append(product)
for key, formula in product_ingredient_rows.items():
matched_products = products_by_formula_key.get(key, [])
if not matched_products:
continue
for product in matched_products:
existing_ingredients = {
ingredient.raw_material_id: ingredient
for ingredient in db.scalars(select(ProductIngredient).where(ProductIngredient.product_id == product.id)).all()
}
desired_ids: set[int] = set()
for row in formula["ingredients"]:
raw_material = raw_material_map.get(row["raw_material_name"])
if raw_material is None:
continue
desired_ids.add(raw_material.id)
ingredient = existing_ingredients.get(raw_material.id)
if ingredient is None:
db.add(
ProductIngredient(
tenant_id=TENANT_ID,
product_id=product.id,
raw_material_id=raw_material.id,
quantity_kg=row["quantity_kg"],
sort_order=row["sort_order"],
)
)
else:
ingredient.quantity_kg = row["quantity_kg"]
ingredient.sort_order = row["sort_order"]
for raw_material_id, ingredient in existing_ingredients.items():
if raw_material_id not in desired_ids:
db.delete(ingredient)
def _infer_throughput_bag_size(product: Product) -> float | None:
if product.sale_type == "bulka":
return None
unit = (product.unit_of_measure or "").strip().lower()
match = re.search(r"(\d+(?:\.\d+)?)\s*kg", unit)
if match:
return float(match.group(1))
if unit == "kg":
return 1.0
if unit == "tonne":
return 1000.0
return None
def _infer_throughput_bulka_default(product: Product) -> bool:
unit = (product.unit_of_measure or "").lower()
return product.sale_type == "bulka" or "bulka" in product.name.lower() or "bulka" in unit
def seed_throughput_products_from_costing(db) -> dict[str, int]:
"""Mirror costing products into the throughput product dropdown."""
costing_products = db.scalars(
select(Product)
.where(Product.tenant_id == TENANT_ID)
.order_by(Product.name, Product.id)
).all()
if not costing_products:
return {"created": 0, "updated": 0, "skipped": 0}
throughput_products = db.scalars(
select(ThroughputProduct).where(ThroughputProduct.tenant_id == TENANT_ID)
).all()
by_item = {
throughput_product.item_id: throughput_product
for throughput_product in throughput_products
if throughput_product.item_id
}
by_name = {
throughput_product.name.strip().lower(): throughput_product
for throughput_product in throughput_products
if throughput_product.name
}
created = 0
updated = 0
skipped = 0
seen_item_ids: set[str] = set()
seen_names: set[str] = set()
for costing_product in costing_products:
name = (costing_product.name or "").strip()
if not name:
skipped += 1
continue
item_id = (costing_product.item_id or "").strip() or None
name_key = name.lower()
if item_id and item_id in seen_item_ids:
skipped += 1
continue
if not item_id and name_key in seen_names:
skipped += 1
continue
if item_id:
seen_item_ids.add(item_id)
seen_names.add(name_key)
default_bag_size = _infer_throughput_bag_size(costing_product)
is_bulka_default = _infer_throughput_bulka_default(costing_product)
product = (by_item.get(item_id) if item_id else None) or by_name.get(name_key)
if product is None:
product = ThroughputProduct(
tenant_id=TENANT_ID,
item_id=item_id,
name=name,
default_bag_size=default_bag_size,
is_bulka_default=is_bulka_default,
active=costing_product.visible,
is_stock_item=True,
notes="Seeded from costing products",
)
db.add(product)
created += 1
if item_id:
by_item[item_id] = product
by_name[name_key] = product
continue
changed = False
if item_id and product.item_id != item_id:
product.item_id = item_id
changed = True
if product.name != name:
old_name_key = product.name.strip().lower() if product.name else None
product.name = name
if old_name_key:
by_name.pop(old_name_key, None)
by_name[name_key] = product
changed = True
if product.default_bag_size != default_bag_size:
product.default_bag_size = default_bag_size
changed = True
if product.is_bulka_default != is_bulka_default:
product.is_bulka_default = is_bulka_default
changed = True
if product.active != costing_product.visible:
product.active = costing_product.visible
changed = True
if product.is_stock_item is not True:
product.is_stock_item = True
changed = True
if product.notes in {None, "", "Seeded from costing products"}:
product.notes = "Seeded from costing products"
if changed:
updated += 1
db.flush()
return {"created": created, "updated": updated, "skipped": skipped}
def seed_client_access(db):
existing = db.scalar(select(ClientAccount.id))
if existing is not None:
@@ -667,7 +906,7 @@ def seed_client_access(db):
)
enabled_feature_map = {
TENANT_ID: {"dashboard", "raw_materials", "mix_master", "mix_calculator", "products", "scenarios", "powerbi_export", "client_access"},
TENANT_ID: {"dashboard", "raw_materials", "mix_master", "mix_calculator", "products", "scenarios", "powerbi_export", "client_access", "operations_throughput"},
"loft-grains": {"dashboard", "mix_calculator", "products", "powerbi_export"},
}
@@ -713,10 +952,13 @@ def seed_client_access(db):
def seed_costing_workspace(db):
workbook = _load_workbook()
raw_material_rows = _read_raw_material_rows(workbook)
mix_rows = _read_mix_rows(workbook)
product_rows = _read_product_rows(workbook)
costing_workbook = _load_workbook("C- Raw Products Costs", "M - All", "Product Cost - Price")
formula_workbook = _load_workbook("mix_quantites_per_client_per_pr")
raw_material_rows = _read_raw_material_rows(costing_workbook)
mix_rows = _read_mix_rows(costing_workbook)
product_rows = _read_product_rows(costing_workbook)
product_ingredient_rows = _read_product_ingredient_rows(formula_workbook)
raw_material_map = _upsert_raw_materials(db, raw_material_rows)
_upsert_process_rules(db, product_rows)
@@ -735,9 +977,53 @@ def seed_costing_workspace(db):
mix_cache[(mix_row["client_name"], mix_row["name"])] = mix
_upsert_products(db, product_rows, mix_cache, raw_material_map)
_upsert_product_ingredients(
db,
product_rows=product_rows,
product_ingredient_rows=product_ingredient_rows,
raw_material_map=raw_material_map,
)
def seed_if_empty():
def seed_throughput_workbook(db):
"""Import the Operations Throughput workbook on first run if tables are empty."""
has_products = db.scalar(select(ThroughputProduct.id)) is not None
has_entries = db.scalar(select(ProductionThroughput.id)) is not None
if not has_products and not has_entries:
workbook_path = resolve_throughput_workbook_path()
if workbook_path is None:
logger.info("Operations Throughput workbook not found; seeding throughput products from costing products")
else:
try:
report = import_throughput_workbook(db, workbook_path, TENANT_ID)
except Exception:
logger.exception("Failed to seed Operations Throughput workbook from %s", workbook_path)
else:
logger.info("Operations Throughput seeded from %s: %s", workbook_path, report)
report = seed_throughput_products_from_costing(db)
if any(report.values()):
logger.info("Throughput products synced from costing products: %s", report)
def seed_throughput_products(db):
"""Sync throughput products from costing products without importing historical entries."""
report = seed_throughput_products_from_costing(db)
if any(report.values()):
logger.info("Throughput products synced from costing products: %s", report)
return
def seed_startup_basics():
Base.metadata.create_all(bind=engine)
with SessionLocal() as db:
seed_client_access(db)
seed_access(db)
seed_throughput_workbook(db)
db.commit()
def seed_all():
Base.metadata.create_all(bind=engine)
with SessionLocal() as db:
workbook_path = _resolve_workbook_path()
@@ -748,10 +1034,29 @@ def seed_if_empty():
"Skipping costing workspace seed because workbook is missing. Checked: %s",
", ".join(str(path) for path in _workbook_candidates()),
)
seed_throughput_products(db)
seed_client_access(db)
seed_access(db)
db.commit()
def seed_if_empty():
Base.metadata.create_all(bind=engine)
with SessionLocal() as db:
if db.scalar(select(RawMaterial.id)) is None:
workbook_path = _resolve_workbook_path()
if workbook_path.exists():
seed_costing_workspace(db)
else:
logger.warning(
"Skipping costing workspace seed because workbook is missing. Checked: %s",
", ".join(str(path) for path in _workbook_candidates()),
)
seed_throughput_products(db)
seed_client_access(db)
seed_access(db)
db.commit()
if __name__ == "__main__":
seed_if_empty()
seed_all()