Files
data-entry-app/backend/app/seed.py
T

1112 lines
40 KiB
Python
Raw Normal View History

from __future__ import annotations
from collections import Counter
2026-04-25 22:51:36 +12:00
from datetime import date, datetime
import logging
import os
from pathlib import Path
import re
2026-04-25 20:43:37 +12:00
from openpyxl import load_workbook
2026-06-03 15:09:21 +12:00
from sqlalchemy import func, select
2026-05-31 20:19:44 +12:00
from sqlalchemy.orm import selectinload
2026-04-25 20:43:37 +12:00
from app.db.session import Base, SessionLocal, engine
from app.models.assumption import FreightCostRule, PackagingCostRule, ProcessCostRule
from app.models.client_access import ClientAccessAuditEvent, ClientAccount, ClientFeatureAccess, ClientUser, ClientUserModulePermission
2026-04-25 20:43:37 +12:00
from app.models.mix import Mix, MixIngredient
2026-05-31 20:19:44 +12:00
from app.models.product import Product, ProductIngredient
2026-04-25 20:43:37 +12:00
from app.models.raw_material import RawMaterial, RawMaterialPriceVersion
2026-05-31 20:19:44 +12:00
from app.models.throughput import ProductionThroughput, ThroughputProduct
from app.seed_access import seed_access
from app.services.client_access_service import MODULE_CATALOG, default_access_level_for_role
2026-05-31 20:19:44 +12:00
from app.services.throughput_service import import_workbook as import_throughput_workbook
from app.services.throughput_service import resolve_workbook_path as resolve_throughput_workbook_path
2026-04-25 22:51:36 +12:00
TENANT_ID = "hunter-premium-produce"
WORKBOOK_EFFECTIVE_DATE = date(2025, 9, 1)
WORKBOOK_SENTINEL_ITEM_ID = "404266"
2026-05-31 20:19:44 +12:00
WORKBOOK_FILENAME = "1.xlsx"
LEGACY_WORKBOOK_FILENAME = "Input Cost Spreadsheet(1).xlsx"
logger = logging.getLogger("data_entry_app.seed")
2026-05-10 09:46:07 +12:00
HIDDEN_PRODUCT_CLIENTS = frozenset(
{
"Bird Grits",
"Chaff",
"Hay & Straw",
"Hunter Premium Produce",
"Straight Grain",
"Uncategorized",
"Uncategorised",
}
)
def _workbook_candidates() -> list[Path]:
env_value = os.getenv("WORKBOOK_PATH")
env_path = env_value.strip() if isinstance(env_value, str) and env_value.strip() else None
repo_root = Path(__file__).resolve().parents[2]
cwd = Path.cwd()
candidates = [
Path(env_path) if env_path else None,
2026-05-31 20:19:44 +12:00
repo_root / "input_data" / WORKBOOK_FILENAME,
cwd / "input_data" / WORKBOOK_FILENAME,
Path("/srv/lean101-clients") / WORKBOOK_FILENAME,
repo_root / WORKBOOK_FILENAME,
cwd / WORKBOOK_FILENAME,
Path("/app") / WORKBOOK_FILENAME,
Path("/") / WORKBOOK_FILENAME,
2026-05-31 20:19:44 +12:00
repo_root / LEGACY_WORKBOOK_FILENAME,
cwd / LEGACY_WORKBOOK_FILENAME,
Path("/srv/lean101-clients") / LEGACY_WORKBOOK_FILENAME,
Path("/app") / LEGACY_WORKBOOK_FILENAME,
Path("/") / LEGACY_WORKBOOK_FILENAME,
]
ordered: list[Path] = []
seen: set[str] = set()
for candidate in candidates:
if candidate is None:
continue
key = str(candidate)
if key in seen:
continue
seen.add(key)
ordered.append(candidate)
return ordered
def _resolve_workbook_path() -> Path:
for candidate in _workbook_candidates():
if candidate.exists():
return candidate
return _workbook_candidates()[0]
def _text(value) -> str | None:
if value is None:
return None
if isinstance(value, str):
normalized = value.strip()
if not normalized:
return None
if normalized.lower() in {"#n/a", "#value!", "n/a", "na", "none"}:
return None
return normalized
return str(value).strip() or None
def _number(value) -> float | None:
if value is None:
return None
if isinstance(value, bool):
return float(value)
if isinstance(value, (int, float)):
return float(value)
if isinstance(value, str):
normalized = value.strip().replace(",", "")
if not normalized or normalized.lower() in {"#n/a", "#value!", "n/a", "na", "none"}:
return None
try:
return float(normalized)
except ValueError:
return None
return None
def _format_quantity(value: float | int | None) -> str:
if value is None:
return "0"
numeric = float(value)
if abs(numeric - round(numeric)) < 1e-9:
return str(int(round(numeric)))
return f"{numeric:.4f}".rstrip("0").rstrip(".")
def _slug(value: str | None, *, fallback: str) -> str:
base = _text(value) or fallback
slug = re.sub(r"[^a-z0-9]+", "_", base.lower()).strip("_")
return slug or fallback
def _normalize_sale_type(value) -> str:
label = (_text(value) or "standard").lower()
if label == "per unit":
return "per_unit"
return re.sub(r"[^a-z0-9]+", "_", label)
def _sheet_own_bag_to_model(value) -> bool:
label = (_text(value) or "").lower()
return label == "no bag"
def _normalize_raw_material_unit(unit_label, kg_per_unit: float | None) -> str:
label = (_text(unit_label) or "").lower()
if label in {"per ton", "per tonne", "ton", "tonne"}:
return "tonne"
if label == "kg":
return "kg"
if label == "per bag 20kg":
return "20kg bag"
if "20 kg" in label:
return "20kg bag"
if "kg" in label and kg_per_unit:
return f"{_format_quantity(kg_per_unit)}kg bag"
if kg_per_unit == 1000:
return "tonne"
return _text(unit_label) or "kg"
def _build_base_unit_label(sale_type: str, std_unit: float, own_bag: bool) -> str:
if sale_type == "standard":
return f"{_format_quantity(std_unit)}kg no bag" if own_bag else f"{_format_quantity(std_unit)}kg bag"
if sale_type == "bulka":
return f"{_format_quantity(std_unit)}kg bulka"
if sale_type == "per_unit":
return f"{_format_quantity(std_unit)} unit"
return f"{_format_quantity(std_unit)}kg"
def _derive_margin(finished_cost: float, sell_price) -> float | None:
price = _number(sell_price)
if price is None or price <= 0 or finished_cost <= 0 or price <= finished_cost:
return None
margin = 1 - (finished_cost / price)
if margin <= 0 or margin >= 1:
return None
return round(margin, 6)
def _build_process_key(label, grading_cost: float, bagging_cost: float, cracking_cost: float) -> str | None:
if abs(grading_cost) < 1e-9 and abs(bagging_cost) < 1e-9 and abs(cracking_cost) < 1e-9:
return None
base = _slug(label, fallback="custom_process")
return f"{base}_g{int(round(grading_cost * 1000))}_b{int(round(bagging_cost * 1000))}_c{int(round(cracking_cost * 1000))}"
2026-05-31 20:19:44 +12:00
def _load_workbook(*required_sheets: str):
for candidate in _workbook_candidates():
if not candidate.exists():
continue
workbook = load_workbook(candidate, data_only=True)
if all(sheet_name in workbook.sheetnames for sheet_name in required_sheets):
return workbook
if required_sheets:
raise FileNotFoundError(
"No workbook with required sheets found. "
f"Required sheets: {', '.join(required_sheets)}. "
f"Checked: {', '.join(str(path) for path in _workbook_candidates())}"
)
workbook_path = _resolve_workbook_path()
if not workbook_path.exists():
raise FileNotFoundError(
f"Workbook not found. Checked: {', '.join(str(path) for path in _workbook_candidates())}"
)
return load_workbook(workbook_path, data_only=True)
def _read_raw_material_rows(workbook) -> list[dict]:
rows: list[dict] = []
worksheet = workbook["C- Raw Products Costs"]
for row in worksheet.iter_rows(min_row=3, values_only=True):
name = _text(row[0])
if not name:
continue
market_value = _number(row[1])
kg_per_unit = _number(row[3])
waste_percentage = _number(row[4]) or 0.0
cost_per_kg = _number(row[7])
if cost_per_kg is None and market_value is None:
continue
if kg_per_unit is None or kg_per_unit <= 0:
kg_per_unit = 1.0
if market_value is None and cost_per_kg is not None:
market_value = round(cost_per_kg * kg_per_unit, 4)
rows.append(
{
"name": name,
"unit_of_measure": _normalize_raw_material_unit(row[2], kg_per_unit),
"kg_per_unit": kg_per_unit,
"market_value": round(market_value, 4) if market_value is not None else None,
"waste_percentage": waste_percentage,
}
)
return rows
def _read_mix_rows(workbook) -> dict[tuple[str, str], dict]:
worksheet = workbook["M - All"]
header_row = next(worksheet.iter_rows(min_row=1, max_row=1, values_only=True))
ingredient_names = [_text(value) for value in header_row[3:] if _text(value)]
best_rows: dict[tuple[str, str], dict] = {}
for row in worksheet.iter_rows(min_row=2, values_only=True):
client_name = _text(row[0])
mix_name = _text(row[1])
if not client_name or not mix_name:
continue
ingredients = []
for ingredient_name, quantity in zip(ingredient_names, row[3 : 3 + len(ingredient_names)]):
numeric_quantity = _number(quantity)
if ingredient_name and numeric_quantity and numeric_quantity > 0:
ingredients.append({"raw_material_name": ingredient_name, "quantity_kg": numeric_quantity})
if not ingredients:
continue
total_kg = _number(row[2]) or round(sum(item["quantity_kg"] for item in ingredients), 4)
score = (len(ingredients), 1 if _number(row[2]) is not None else 0, total_kg)
key = (client_name, mix_name)
current = best_rows.get(key)
if current is None or score > current["score"]:
best_rows[key] = {
"client_name": client_name,
"name": mix_name,
"ingredients": ingredients,
"total_kg": total_kg,
"score": score,
}
return best_rows
2026-05-31 20:19:44 +12:00
def _read_product_ingredient_rows(workbook) -> dict[tuple[str, str], dict]:
worksheet = workbook["mix_quantites_per_client_per_pr"]
header_row = next(worksheet.iter_rows(min_row=1, max_row=1, values_only=True))
ingredient_names = [_text(value) for value in header_row[3:] if _text(value)]
rows: dict[tuple[str, str], dict] = {}
for row in worksheet.iter_rows(min_row=2, values_only=True):
client_name = _text(row[0])
product_name = _text(row[1])
if not client_name or not product_name:
continue
ingredients = []
for sort_order, (ingredient_name, quantity) in enumerate(zip(ingredient_names, row[3 : 3 + len(ingredient_names)]), start=1):
numeric_quantity = _number(quantity)
if ingredient_name and numeric_quantity and numeric_quantity > 0:
ingredients.append(
{
"raw_material_name": ingredient_name,
"quantity_kg": numeric_quantity,
"sort_order": sort_order,
}
)
if not ingredients:
continue
total_kg = _number(row[2]) or round(sum(item["quantity_kg"] for item in ingredients), 4)
rows[(client_name, product_name)] = {
"client_name": client_name,
"product_name": product_name,
"total_kg": total_kg,
"ingredients": ingredients,
}
return rows
def _read_product_rows(workbook) -> list[dict]:
worksheet = workbook["Product Cost - Price"]
raw_rows: list[dict] = []
unit_variants: dict[tuple[str, bool, float], Counter[tuple[float, float]]] = {}
for row in worksheet.iter_rows(min_row=5, values_only=True):
item_id = _text(row[1])
name = _text(row[2])
mix_name = _text(row[3])
if not item_id or not name or not mix_name:
continue
sale_type = _normalize_sale_type(row[4])
own_bag = _sheet_own_bag_to_model(row[5])
std_unit = _number(row[6]) or 1.0
bag_cost = round(_number(row[15]) or 0.0, 4)
freight_cost = round(_number(row[16]) or 0.0, 4)
base_unit_key = (sale_type, own_bag, std_unit)
unit_variants.setdefault(base_unit_key, Counter())[(bag_cost, freight_cost)] += 1
raw_rows.append(
{
"client_name": _text(row[0]) or "General",
"item_id": item_id,
"name": name,
"mix_name": mix_name,
"sale_type": sale_type,
"own_bag": own_bag,
"std_unit": std_unit,
"items_per_pallet": int(round(_number(row[7]) or 1)),
"grading_cost": round(_number(row[12]) or 0.0, 4),
"bagging_cost": round(_number(row[13]) or 0.0, 4),
"cracking_cost": round(_number(row[14]) or 0.0, 4),
"bag_cost": bag_cost,
"freight_cost": freight_cost,
"finished_product_delivered": round(_number(row[17]) or 0.0, 4),
"distributor_margin": _derive_margin(round(_number(row[17]) or 0.0, 4), row[19]),
"wholesale_margin": _derive_margin(round(_number(row[17]) or 0.0, 4), row[20]),
"process_label": _text(row[8]),
"sheet_own_bag": _text(row[5]),
2026-05-10 09:46:07 +12:00
"visible": (_text(row[0]) or "General") not in HIDDEN_PRODUCT_CLIENTS,
}
)
products: list[dict] = []
for row in raw_rows:
base_unit_key = (row["sale_type"], row["own_bag"], row["std_unit"])
unit_label = _build_base_unit_label(row["sale_type"], row["std_unit"], row["own_bag"])
variant_counts = unit_variants[base_unit_key]
if len(variant_counts) > 1:
current_variant = (row["bag_cost"], row["freight_cost"])
primary_variant = variant_counts.most_common(1)[0][0]
if current_variant != primary_variant:
if row["sheet_own_bag"] == "Yes":
unit_label = f"{unit_label} (Own Bag)"
elif row["client_name"] == "Peckish":
unit_label = f"{unit_label} (Peckish)"
elif row["client_name"] == "Uncategorized":
unit_label = f"{unit_label} (Bulk)"
else:
unit_label = f"{unit_label} ({row['client_name']})"
process_key = _build_process_key(
row["process_label"],
row["grading_cost"],
row["bagging_cost"],
row["cracking_cost"],
)
row["unit_of_measure"] = unit_label
row["bagging_process"] = process_key
products.append(row)
return products
def _upsert_raw_materials(db, rows: list[dict]) -> dict[str, RawMaterial]:
existing_map = {
material.name: material
for material in db.scalars(select(RawMaterial).where(RawMaterial.tenant_id == TENANT_ID)).all()
}
for row in rows:
material = existing_map.get(row["name"])
if material is None:
material = RawMaterial(
tenant_id=TENANT_ID,
name=row["name"],
supplier="Workbook Import",
unit_of_measure=row["unit_of_measure"],
kg_per_unit=row["kg_per_unit"],
status="active",
notes="Seeded from Input Cost Spreadsheet(1).xlsx",
)
db.add(material)
db.flush()
existing_map[row["name"]] = material
else:
material.unit_of_measure = row["unit_of_measure"]
material.kg_per_unit = row["kg_per_unit"]
material.status = "active"
material.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"
active_price = next((price for price in material.price_versions if price.status == "active"), None)
if row["market_value"] is not None and row["market_value"] > 0:
if active_price is None:
material.price_versions.append(
RawMaterialPriceVersion(
tenant_id=TENANT_ID,
market_value=row["market_value"],
waste_percentage=row["waste_percentage"],
effective_date=WORKBOOK_EFFECTIVE_DATE,
status="active",
notes="Seeded from Input Cost Spreadsheet(1).xlsx",
)
)
else:
active_price.market_value = row["market_value"]
active_price.waste_percentage = row["waste_percentage"]
active_price.effective_date = WORKBOOK_EFFECTIVE_DATE
active_price.status = "active"
active_price.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"
elif active_price is not None and active_price.market_value <= 0:
active_price.status = "inactive"
active_price.notes = "Disabled during workbook import because market value was non-positive"
db.flush()
return existing_map
def _upsert_process_rules(db, products: list[dict]) -> None:
existing_rules = {
rule.process_name: rule
for rule in db.scalars(select(ProcessCostRule).where(ProcessCostRule.tenant_id == TENANT_ID)).all()
}
for product in products:
process_name = product["bagging_process"]
if not process_name:
continue
rule = existing_rules.get(process_name)
if rule is None:
rule = ProcessCostRule(
tenant_id=TENANT_ID,
process_name=process_name,
grading_cost=product["grading_cost"],
bagging_cost=product["bagging_cost"],
cracking_cost=product["cracking_cost"],
)
db.add(rule)
existing_rules[process_name] = rule
else:
rule.grading_cost = product["grading_cost"]
rule.bagging_cost = product["bagging_cost"]
rule.cracking_cost = product["cracking_cost"]
def _upsert_packaging_and_freight_rules(db, products: list[dict]) -> None:
packaging_rules = {
(rule.sale_type, rule.unit_of_measure, rule.own_bag): rule
for rule in db.scalars(select(PackagingCostRule).where(PackagingCostRule.tenant_id == TENANT_ID)).all()
}
freight_rules = {
(rule.sale_type, rule.unit_of_measure): rule
for rule in db.scalars(select(FreightCostRule).where(FreightCostRule.tenant_id == TENANT_ID)).all()
}
for product in products:
packaging_key = (product["sale_type"], product["unit_of_measure"], product["own_bag"])
packaging_rule = packaging_rules.get(packaging_key)
if packaging_rule is None:
packaging_rule = PackagingCostRule(
tenant_id=TENANT_ID,
sale_type=product["sale_type"],
unit_of_measure=product["unit_of_measure"],
own_bag=product["own_bag"],
bag_cost=product["bag_cost"],
)
db.add(packaging_rule)
packaging_rules[packaging_key] = packaging_rule
else:
packaging_rule.bag_cost = product["bag_cost"]
freight_key = (product["sale_type"], product["unit_of_measure"])
freight_rule = freight_rules.get(freight_key)
if freight_rule is None:
freight_rule = FreightCostRule(
tenant_id=TENANT_ID,
sale_type=product["sale_type"],
unit_of_measure=product["unit_of_measure"],
cost_per_unit=product["freight_cost"],
)
db.add(freight_rule)
freight_rules[freight_key] = freight_rule
else:
freight_rule.cost_per_unit = product["freight_cost"]
def _upsert_mix(
db,
*,
client_name: str,
mix_name: str,
ingredients: list[dict],
raw_material_map: dict[str, RawMaterial],
mix_cache: dict[tuple[str, str], Mix],
) -> Mix:
key = (client_name, mix_name)
mix = mix_cache.get(key)
if mix is None:
mix = db.scalar(
select(Mix).where(
Mix.tenant_id == TENANT_ID,
Mix.client_name == client_name,
Mix.name == mix_name,
)
)
if mix is None:
mix = Mix(
tenant_id=TENANT_ID,
client_name=client_name,
name=mix_name,
status="active",
version=1,
notes="Seeded from Input Cost Spreadsheet(1).xlsx",
)
db.add(mix)
db.flush()
mix_cache[key] = mix
existing_ingredients = {
ingredient.raw_material_id: ingredient
for ingredient in db.scalars(select(MixIngredient).where(MixIngredient.mix_id == mix.id)).all()
}
desired_ids = set()
for ingredient_row in ingredients:
raw_material = raw_material_map.get(ingredient_row["raw_material_name"])
if raw_material is None:
continue
desired_ids.add(raw_material.id)
ingredient = existing_ingredients.get(raw_material.id)
if ingredient is None:
db.add(
MixIngredient(
tenant_id=TENANT_ID,
mix_id=mix.id,
raw_material_id=raw_material.id,
quantity_kg=ingredient_row["quantity_kg"],
)
)
else:
ingredient.quantity_kg = ingredient_row["quantity_kg"]
for raw_material_id, ingredient in existing_ingredients.items():
if raw_material_id not in desired_ids:
db.delete(ingredient)
return mix
def _ensure_single_material_mix(
db,
*,
client_name: str,
raw_material_name: str,
raw_material_map: dict[str, RawMaterial],
mix_cache: dict[tuple[str, str], Mix],
) -> Mix:
raw_material = raw_material_map[raw_material_name]
return _upsert_mix(
db,
client_name=client_name,
mix_name=raw_material_name,
ingredients=[
{
"raw_material_name": raw_material_name,
"quantity_kg": raw_material.kg_per_unit or 1.0,
}
],
raw_material_map=raw_material_map,
mix_cache=mix_cache,
)
def _upsert_products(db, products: list[dict], mix_lookup: dict[tuple[str, str], Mix], raw_material_map: dict[str, RawMaterial]) -> None:
mix_cache = dict(mix_lookup)
mixes_by_name: dict[str, list[Mix]] = {}
for mix in mix_cache.values():
mixes_by_name.setdefault(mix.name, []).append(mix)
existing_products = {
product.item_id: product
for product in db.scalars(select(Product).where(Product.tenant_id == TENANT_ID)).all()
if product.item_id
}
for row in products:
mix = mix_cache.get((row["client_name"], row["mix_name"]))
if mix is None:
named_mixes = mixes_by_name.get(row["mix_name"], [])
if len(named_mixes) == 1:
mix = named_mixes[0]
if mix is None and row["mix_name"] in raw_material_map:
mix = _ensure_single_material_mix(
db,
client_name=row["client_name"],
raw_material_name=row["mix_name"],
raw_material_map=raw_material_map,
mix_cache=mix_cache,
)
if mix is None:
continue
product = existing_products.get(row["item_id"])
if product is None:
product = Product(
tenant_id=TENANT_ID,
client_name=row["client_name"],
item_id=row["item_id"],
name=row["name"],
mix_id=mix.id,
sale_type=row["sale_type"],
own_bag=row["own_bag"],
2026-05-10 09:46:07 +12:00
visible=row["visible"],
unit_of_measure=row["unit_of_measure"],
items_per_pallet=row["items_per_pallet"],
bagging_process=row["bagging_process"],
distributor_margin=row["distributor_margin"],
wholesale_margin=row["wholesale_margin"],
notes="Seeded from Input Cost Spreadsheet(1).xlsx",
)
db.add(product)
existing_products[row["item_id"]] = product
else:
product.client_name = row["client_name"]
product.name = row["name"]
product.mix_id = mix.id
product.sale_type = row["sale_type"]
product.own_bag = row["own_bag"]
2026-05-10 09:46:07 +12:00
product.visible = row["visible"]
product.unit_of_measure = row["unit_of_measure"]
product.items_per_pallet = row["items_per_pallet"]
product.bagging_process = row["bagging_process"]
product.distributor_margin = row["distributor_margin"]
product.wholesale_margin = row["wholesale_margin"]
product.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"
2026-05-31 20:19:44 +12:00
def _upsert_product_ingredients(
db,
*,
product_rows: list[dict],
product_ingredient_rows: dict[tuple[str, str], dict],
raw_material_map: dict[str, RawMaterial],
) -> None:
products = db.scalars(
select(Product).where(Product.tenant_id == TENANT_ID).options(selectinload(Product.mix))
).all()
products_by_formula_key: dict[tuple[str, str], list[Product]] = {}
for product in products:
candidate_keys = {
(product.client_name, product.name),
}
if product.mix is not None:
candidate_keys.add((product.client_name, product.mix.name))
for key in candidate_keys:
products_by_formula_key.setdefault(key, []).append(product)
for key, formula in product_ingredient_rows.items():
matched_products = products_by_formula_key.get(key, [])
if not matched_products:
continue
for product in matched_products:
existing_ingredients = {
ingredient.raw_material_id: ingredient
for ingredient in db.scalars(select(ProductIngredient).where(ProductIngredient.product_id == product.id)).all()
}
desired_ids: set[int] = set()
for row in formula["ingredients"]:
raw_material = raw_material_map.get(row["raw_material_name"])
if raw_material is None:
continue
desired_ids.add(raw_material.id)
ingredient = existing_ingredients.get(raw_material.id)
if ingredient is None:
db.add(
ProductIngredient(
tenant_id=TENANT_ID,
product_id=product.id,
raw_material_id=raw_material.id,
quantity_kg=row["quantity_kg"],
sort_order=row["sort_order"],
)
)
else:
ingredient.quantity_kg = row["quantity_kg"]
ingredient.sort_order = row["sort_order"]
for raw_material_id, ingredient in existing_ingredients.items():
if raw_material_id not in desired_ids:
db.delete(ingredient)
2026-06-03 15:09:21 +12:00
def seed_product_ingredients_from_workbook(db) -> dict[str, int]:
"""Backfill row-specific product formulas for databases seeded before this table existed."""
try:
formula_workbook = _load_workbook("mix_quantites_per_client_per_pr")
except FileNotFoundError:
logger.info("Skipping product ingredient backfill because formula workbook is missing")
return {"formulas": 0, "products_with_formulas": 0, "backfilled": 0}
product_ingredient_rows = _read_product_ingredient_rows(formula_workbook)
if not product_ingredient_rows:
return {"formulas": 0, "products_with_formulas": 0, "backfilled": 0}
raw_material_map = {
material.name: material
for material in db.scalars(select(RawMaterial).where(RawMaterial.tenant_id == TENANT_ID)).all()
}
if not raw_material_map:
return {"formulas": len(product_ingredient_rows), "products_with_formulas": 0, "backfilled": 0}
had_product_ingredients = (
db.scalar(select(ProductIngredient.id).where(ProductIngredient.tenant_id == TENANT_ID).limit(1)) is not None
)
_upsert_product_ingredients(
db,
product_rows=[],
product_ingredient_rows=product_ingredient_rows,
raw_material_map=raw_material_map,
)
db.flush()
products_with_formulas = db.scalar(
select(func.count(func.distinct(ProductIngredient.product_id))).where(ProductIngredient.tenant_id == TENANT_ID)
)
return {
"formulas": len(product_ingredient_rows),
"products_with_formulas": int(products_with_formulas or 0),
"backfilled": 0 if had_product_ingredients else int(products_with_formulas or 0),
}
2026-05-31 20:19:44 +12:00
def _infer_throughput_bag_size(product: Product) -> float | None:
if product.sale_type == "bulka":
return None
unit = (product.unit_of_measure or "").strip().lower()
match = re.search(r"(\d+(?:\.\d+)?)\s*kg", unit)
if match:
return float(match.group(1))
if unit == "kg":
return 1.0
if unit == "tonne":
return 1000.0
return None
def _infer_throughput_bulka_default(product: Product) -> bool:
unit = (product.unit_of_measure or "").lower()
return product.sale_type == "bulka" or "bulka" in product.name.lower() or "bulka" in unit
def seed_throughput_products_from_costing(db) -> dict[str, int]:
"""Mirror costing products into the throughput product dropdown."""
costing_products = db.scalars(
select(Product)
.where(Product.tenant_id == TENANT_ID)
.order_by(Product.name, Product.id)
).all()
if not costing_products:
return {"created": 0, "updated": 0, "skipped": 0}
throughput_products = db.scalars(
select(ThroughputProduct).where(ThroughputProduct.tenant_id == TENANT_ID)
).all()
by_item = {
throughput_product.item_id: throughput_product
for throughput_product in throughput_products
if throughput_product.item_id
}
by_name = {
throughput_product.name.strip().lower(): throughput_product
for throughput_product in throughput_products
if throughput_product.name
}
created = 0
updated = 0
skipped = 0
seen_item_ids: set[str] = set()
seen_names: set[str] = set()
for costing_product in costing_products:
name = (costing_product.name or "").strip()
if not name:
skipped += 1
continue
item_id = (costing_product.item_id or "").strip() or None
name_key = name.lower()
if item_id and item_id in seen_item_ids:
skipped += 1
continue
if not item_id and name_key in seen_names:
skipped += 1
continue
if item_id:
seen_item_ids.add(item_id)
seen_names.add(name_key)
default_bag_size = _infer_throughput_bag_size(costing_product)
is_bulka_default = _infer_throughput_bulka_default(costing_product)
2026-06-02 15:41:53 +12:00
client_name = (costing_product.client_name or "").strip() or None
2026-05-31 20:19:44 +12:00
product = (by_item.get(item_id) if item_id else None) or by_name.get(name_key)
if product is None:
product = ThroughputProduct(
tenant_id=TENANT_ID,
item_id=item_id,
name=name,
2026-06-02 15:41:53 +12:00
client_name=client_name,
2026-05-31 20:19:44 +12:00
default_bag_size=default_bag_size,
is_bulka_default=is_bulka_default,
2026-06-02 15:41:53 +12:00
# Every costing SKU should be selectable in the throughput picker
# (the Client filter + search keep the long list manageable).
active=True,
2026-05-31 20:19:44 +12:00
is_stock_item=True,
notes="Seeded from costing products",
)
db.add(product)
created += 1
if item_id:
by_item[item_id] = product
by_name[name_key] = product
continue
changed = False
if item_id and product.item_id != item_id:
product.item_id = item_id
changed = True
if product.name != name:
old_name_key = product.name.strip().lower() if product.name else None
product.name = name
if old_name_key:
by_name.pop(old_name_key, None)
by_name[name_key] = product
changed = True
if product.default_bag_size != default_bag_size:
product.default_bag_size = default_bag_size
changed = True
if product.is_bulka_default != is_bulka_default:
product.is_bulka_default = is_bulka_default
changed = True
2026-06-02 15:41:53 +12:00
if product.client_name != client_name:
product.client_name = client_name
changed = True
if product.active is not True:
product.active = True
2026-05-31 20:19:44 +12:00
changed = True
if product.is_stock_item is not True:
product.is_stock_item = True
changed = True
if product.notes in {None, "", "Seeded from costing products"}:
product.notes = "Seeded from costing products"
if changed:
updated += 1
db.flush()
return {"created": created, "updated": updated, "skipped": skipped}
2026-04-25 22:51:36 +12:00
def seed_client_access(db):
existing = db.scalar(select(ClientAccount.id))
if existing is not None:
return
specialty = ClientAccount(
tenant_id=TENANT_ID,
2026-04-25 22:51:36 +12:00
name="Hunter Premium Produce",
client_code="HPP",
status="active",
powerbi_workspace="hunter-premium-produce-prod",
notes="Primary production client for the Lean 101 admin and access workflows",
)
loft = ClientAccount(
tenant_id="loft-grains",
name="Loft Grains",
client_code="LOFT",
status="onboarding",
powerbi_workspace="farm-ops-sandbox",
notes="Onboarding workspace used to test staged user enablement",
)
db.add_all([specialty, loft])
db.flush()
specialty.users.extend(
[
ClientUser(
tenant_id=specialty.tenant_id,
full_name="Amelia Hart",
email="operator@example.com",
role="superadmin",
2026-04-25 22:51:36 +12:00
status="active",
is_new_user=False,
last_login_at=datetime(2026, 4, 24, 11, 30),
),
ClientUser(
tenant_id=specialty.tenant_id,
full_name="Ethan Cole",
email="ethan.cole@hunterpremiumproduce.example",
role="operator",
status="invited",
is_new_user=True,
),
]
)
loft.users.extend(
[
ClientUser(
tenant_id=loft.tenant_id,
full_name="Ruby Singh",
email="ruby.singh@loftgrains.example",
role="viewer",
status="active",
is_new_user=False,
last_login_at=datetime(2026, 4, 22, 9, 10),
)
]
)
enabled_feature_map = {
2026-05-31 20:19:44 +12:00
TENANT_ID: {"dashboard", "raw_materials", "mix_master", "mix_calculator", "products", "scenarios", "powerbi_export", "client_access", "operations_throughput"},
2026-04-29 23:05:27 +12:00
"loft-grains": {"dashboard", "mix_calculator", "products", "powerbi_export"},
2026-04-25 22:51:36 +12:00
}
for client in (specialty, loft):
enabled_keys = enabled_feature_map[client.tenant_id]
for feature_key, feature_name, feature_group, description in MODULE_CATALOG:
2026-04-25 22:51:36 +12:00
client.features.append(
ClientFeatureAccess(
tenant_id=client.tenant_id,
feature_key=feature_key,
feature_name=feature_name,
feature_group=feature_group,
description=description,
enabled=feature_key in enabled_keys,
)
)
for user in client.users:
for module_key, _, _, _ in MODULE_CATALOG:
user.module_permissions.append(
ClientUserModulePermission(
tenant_id=client.tenant_id,
client_account_id=client.id,
module_key=module_key,
access_level=default_access_level_for_role(user.role, module_key),
)
)
specialty.audit_events.append(
ClientAccessAuditEvent(
tenant_id=specialty.tenant_id,
actor_type="seed",
actor_name="Lean 101 Seeder",
actor_email="system@lean101.local",
actor_role="system",
action="client_access.seeded",
target_type="client_account",
target_id=specialty.id,
module_key="client_access",
summary="Initial client access controls, module permissions, and feature flags were seeded.",
)
)
2026-04-25 22:51:36 +12:00
def seed_costing_workspace(db):
2026-05-31 20:19:44 +12:00
costing_workbook = _load_workbook("C- Raw Products Costs", "M - All", "Product Cost - Price")
formula_workbook = _load_workbook("mix_quantites_per_client_per_pr")
raw_material_rows = _read_raw_material_rows(costing_workbook)
mix_rows = _read_mix_rows(costing_workbook)
product_rows = _read_product_rows(costing_workbook)
product_ingredient_rows = _read_product_ingredient_rows(formula_workbook)
2026-04-25 22:51:36 +12:00
raw_material_map = _upsert_raw_materials(db, raw_material_rows)
_upsert_process_rules(db, product_rows)
_upsert_packaging_and_freight_rules(db, product_rows)
2026-04-25 22:51:36 +12:00
mix_cache: dict[tuple[str, str], Mix] = {}
for mix_row in mix_rows.values():
mix = _upsert_mix(
db,
client_name=mix_row["client_name"],
mix_name=mix_row["name"],
ingredients=mix_row["ingredients"],
raw_material_map=raw_material_map,
mix_cache=mix_cache,
2026-04-25 22:51:36 +12:00
)
mix_cache[(mix_row["client_name"], mix_row["name"])] = mix
_upsert_products(db, product_rows, mix_cache, raw_material_map)
2026-05-31 20:19:44 +12:00
_upsert_product_ingredients(
db,
product_rows=product_rows,
product_ingredient_rows=product_ingredient_rows,
raw_material_map=raw_material_map,
)
2026-04-25 22:51:36 +12:00
2026-05-31 20:19:44 +12:00
def seed_throughput_workbook(db):
"""Import the Operations Throughput workbook on first run if tables are empty."""
has_products = db.scalar(select(ThroughputProduct.id)) is not None
has_entries = db.scalar(select(ProductionThroughput.id)) is not None
if not has_products and not has_entries:
workbook_path = resolve_throughput_workbook_path()
if workbook_path is None:
logger.info("Operations Throughput workbook not found; seeding throughput products from costing products")
else:
try:
report = import_throughput_workbook(db, workbook_path, TENANT_ID)
except Exception:
logger.exception("Failed to seed Operations Throughput workbook from %s", workbook_path)
else:
logger.info("Operations Throughput seeded from %s: %s", workbook_path, report)
report = seed_throughput_products_from_costing(db)
if any(report.values()):
logger.info("Throughput products synced from costing products: %s", report)
def seed_throughput_products(db):
"""Sync throughput products from costing products without importing historical entries."""
report = seed_throughput_products_from_costing(db)
if any(report.values()):
logger.info("Throughput products synced from costing products: %s", report)
return
def seed_startup_basics():
Base.metadata.create_all(bind=engine)
with SessionLocal() as db:
seed_client_access(db)
seed_access(db)
seed_throughput_workbook(db)
2026-06-03 15:09:21 +12:00
report = seed_product_ingredients_from_workbook(db)
if report["backfilled"]:
logger.info("Product ingredients backfilled from workbook: %s", report)
2026-05-31 20:19:44 +12:00
db.commit()
def seed_all():
2026-04-25 20:43:37 +12:00
Base.metadata.create_all(bind=engine)
with SessionLocal() as db:
workbook_path = _resolve_workbook_path()
if workbook_path.exists():
seed_costing_workspace(db)
else:
logger.warning(
"Skipping costing workspace seed because workbook is missing. Checked: %s",
", ".join(str(path) for path in _workbook_candidates()),
)
2026-05-31 20:19:44 +12:00
seed_throughput_products(db)
seed_client_access(db)
seed_access(db)
db.commit()
def seed_if_empty():
Base.metadata.create_all(bind=engine)
with SessionLocal() as db:
if db.scalar(select(RawMaterial.id)) is None:
workbook_path = _resolve_workbook_path()
if workbook_path.exists():
seed_costing_workspace(db)
else:
logger.warning(
"Skipping costing workspace seed because workbook is missing. Checked: %s",
", ".join(str(path) for path in _workbook_candidates()),
)
seed_throughput_products(db)
2026-04-25 22:51:36 +12:00
seed_client_access(db)
seed_access(db)
2026-04-25 20:43:37 +12:00
db.commit()
if __name__ == "__main__":
2026-05-31 20:19:44 +12:00
seed_all()