Files
data-entry-app/backend/app/seed.py
T

698 lines
25 KiB
Python

from __future__ import annotations
from collections import Counter
from datetime import date, datetime
import logging
from pathlib import Path
import re
from openpyxl import load_workbook
from sqlalchemy import select
from app.db.session import Base, SessionLocal, engine
from app.models.assumption import FreightCostRule, PackagingCostRule, ProcessCostRule
from app.models.client_access import ClientAccessAuditEvent, ClientAccount, ClientFeatureAccess, ClientUser, ClientUserModulePermission
from app.models.mix import Mix, MixIngredient
from app.models.product import Product
from app.models.raw_material import RawMaterial, RawMaterialPriceVersion
from app.seed_access import seed_access
from app.services.client_access_service import MODULE_CATALOG, default_access_level_for_role
TENANT_ID = "hunter-premium-produce"
WORKBOOK_EFFECTIVE_DATE = date(2025, 9, 1)
WORKBOOK_SENTINEL_ITEM_ID = "404266"
WORKBOOK_PATH = Path(__file__).resolve().parents[2] / "Input Cost Spreadsheet(1).xlsx"
logger = logging.getLogger("data_entry_app.seed")
def _text(value) -> str | None:
if value is None:
return None
if isinstance(value, str):
normalized = value.strip()
if not normalized:
return None
if normalized.lower() in {"#n/a", "#value!", "n/a", "na", "none"}:
return None
return normalized
return str(value).strip() or None
def _number(value) -> float | None:
if value is None:
return None
if isinstance(value, bool):
return float(value)
if isinstance(value, (int, float)):
return float(value)
if isinstance(value, str):
normalized = value.strip().replace(",", "")
if not normalized or normalized.lower() in {"#n/a", "#value!", "n/a", "na", "none"}:
return None
try:
return float(normalized)
except ValueError:
return None
return None
def _format_quantity(value: float | int | None) -> str:
if value is None:
return "0"
numeric = float(value)
if abs(numeric - round(numeric)) < 1e-9:
return str(int(round(numeric)))
return f"{numeric:.4f}".rstrip("0").rstrip(".")
def _slug(value: str | None, *, fallback: str) -> str:
base = _text(value) or fallback
slug = re.sub(r"[^a-z0-9]+", "_", base.lower()).strip("_")
return slug or fallback
def _normalize_sale_type(value) -> str:
label = (_text(value) or "standard").lower()
if label == "per unit":
return "per_unit"
return re.sub(r"[^a-z0-9]+", "_", label)
def _sheet_own_bag_to_model(value) -> bool:
label = (_text(value) or "").lower()
return label == "no bag"
def _normalize_raw_material_unit(unit_label, kg_per_unit: float | None) -> str:
label = (_text(unit_label) or "").lower()
if label in {"per ton", "per tonne", "ton", "tonne"}:
return "tonne"
if label == "kg":
return "kg"
if label == "per bag 20kg":
return "20kg bag"
if "20 kg" in label:
return "20kg bag"
if "kg" in label and kg_per_unit:
return f"{_format_quantity(kg_per_unit)}kg bag"
if kg_per_unit == 1000:
return "tonne"
return _text(unit_label) or "kg"
def _build_base_unit_label(sale_type: str, std_unit: float, own_bag: bool) -> str:
if sale_type == "standard":
return f"{_format_quantity(std_unit)}kg no bag" if own_bag else f"{_format_quantity(std_unit)}kg bag"
if sale_type == "bulka":
return f"{_format_quantity(std_unit)}kg bulka"
if sale_type == "per_unit":
return f"{_format_quantity(std_unit)} unit"
return f"{_format_quantity(std_unit)}kg"
def _derive_margin(finished_cost: float, sell_price) -> float | None:
price = _number(sell_price)
if price is None or price <= 0 or finished_cost <= 0 or price <= finished_cost:
return None
margin = 1 - (finished_cost / price)
if margin <= 0 or margin >= 1:
return None
return round(margin, 6)
def _build_process_key(label, grading_cost: float, bagging_cost: float, cracking_cost: float) -> str | None:
if abs(grading_cost) < 1e-9 and abs(bagging_cost) < 1e-9 and abs(cracking_cost) < 1e-9:
return None
base = _slug(label, fallback="custom_process")
return f"{base}_g{int(round(grading_cost * 1000))}_b{int(round(bagging_cost * 1000))}_c{int(round(cracking_cost * 1000))}"
def _load_workbook():
if not WORKBOOK_PATH.exists():
raise FileNotFoundError(f"Workbook not found at {WORKBOOK_PATH}")
return load_workbook(WORKBOOK_PATH, data_only=True)
def _read_raw_material_rows(workbook) -> list[dict]:
rows: list[dict] = []
worksheet = workbook["C- Raw Products Costs"]
for row in worksheet.iter_rows(min_row=3, values_only=True):
name = _text(row[0])
if not name:
continue
market_value = _number(row[1])
kg_per_unit = _number(row[3])
waste_percentage = _number(row[4]) or 0.0
cost_per_kg = _number(row[7])
if cost_per_kg is None and market_value is None:
continue
if kg_per_unit is None or kg_per_unit <= 0:
kg_per_unit = 1.0
if market_value is None and cost_per_kg is not None:
market_value = round(cost_per_kg * kg_per_unit, 4)
rows.append(
{
"name": name,
"unit_of_measure": _normalize_raw_material_unit(row[2], kg_per_unit),
"kg_per_unit": kg_per_unit,
"market_value": round(market_value, 4) if market_value is not None else None,
"waste_percentage": waste_percentage,
}
)
return rows
def _read_mix_rows(workbook) -> dict[tuple[str, str], dict]:
worksheet = workbook["M - All"]
header_row = next(worksheet.iter_rows(min_row=1, max_row=1, values_only=True))
ingredient_names = [_text(value) for value in header_row[3:] if _text(value)]
best_rows: dict[tuple[str, str], dict] = {}
for row in worksheet.iter_rows(min_row=2, values_only=True):
client_name = _text(row[0])
mix_name = _text(row[1])
if not client_name or not mix_name:
continue
ingredients = []
for ingredient_name, quantity in zip(ingredient_names, row[3 : 3 + len(ingredient_names)]):
numeric_quantity = _number(quantity)
if ingredient_name and numeric_quantity and numeric_quantity > 0:
ingredients.append({"raw_material_name": ingredient_name, "quantity_kg": numeric_quantity})
if not ingredients:
continue
total_kg = _number(row[2]) or round(sum(item["quantity_kg"] for item in ingredients), 4)
score = (len(ingredients), 1 if _number(row[2]) is not None else 0, total_kg)
key = (client_name, mix_name)
current = best_rows.get(key)
if current is None or score > current["score"]:
best_rows[key] = {
"client_name": client_name,
"name": mix_name,
"ingredients": ingredients,
"total_kg": total_kg,
"score": score,
}
return best_rows
def _read_product_rows(workbook) -> list[dict]:
worksheet = workbook["Product Cost - Price"]
raw_rows: list[dict] = []
unit_variants: dict[tuple[str, bool, float], Counter[tuple[float, float]]] = {}
for row in worksheet.iter_rows(min_row=5, values_only=True):
item_id = _text(row[1])
name = _text(row[2])
mix_name = _text(row[3])
if not item_id or not name or not mix_name:
continue
sale_type = _normalize_sale_type(row[4])
own_bag = _sheet_own_bag_to_model(row[5])
std_unit = _number(row[6]) or 1.0
bag_cost = round(_number(row[15]) or 0.0, 4)
freight_cost = round(_number(row[16]) or 0.0, 4)
base_unit_key = (sale_type, own_bag, std_unit)
unit_variants.setdefault(base_unit_key, Counter())[(bag_cost, freight_cost)] += 1
raw_rows.append(
{
"client_name": _text(row[0]) or "General",
"item_id": item_id,
"name": name,
"mix_name": mix_name,
"sale_type": sale_type,
"own_bag": own_bag,
"std_unit": std_unit,
"items_per_pallet": int(round(_number(row[7]) or 1)),
"grading_cost": round(_number(row[12]) or 0.0, 4),
"bagging_cost": round(_number(row[13]) or 0.0, 4),
"cracking_cost": round(_number(row[14]) or 0.0, 4),
"bag_cost": bag_cost,
"freight_cost": freight_cost,
"finished_product_delivered": round(_number(row[17]) or 0.0, 4),
"distributor_margin": _derive_margin(round(_number(row[17]) or 0.0, 4), row[19]),
"wholesale_margin": _derive_margin(round(_number(row[17]) or 0.0, 4), row[20]),
"process_label": _text(row[8]),
"sheet_own_bag": _text(row[5]),
}
)
products: list[dict] = []
for row in raw_rows:
base_unit_key = (row["sale_type"], row["own_bag"], row["std_unit"])
unit_label = _build_base_unit_label(row["sale_type"], row["std_unit"], row["own_bag"])
variant_counts = unit_variants[base_unit_key]
if len(variant_counts) > 1:
current_variant = (row["bag_cost"], row["freight_cost"])
primary_variant = variant_counts.most_common(1)[0][0]
if current_variant != primary_variant:
if row["sheet_own_bag"] == "Yes":
unit_label = f"{unit_label} (Own Bag)"
elif row["client_name"] == "Peckish":
unit_label = f"{unit_label} (Peckish)"
elif row["client_name"] == "Uncategorized":
unit_label = f"{unit_label} (Bulk)"
else:
unit_label = f"{unit_label} ({row['client_name']})"
process_key = _build_process_key(
row["process_label"],
row["grading_cost"],
row["bagging_cost"],
row["cracking_cost"],
)
row["unit_of_measure"] = unit_label
row["bagging_process"] = process_key
products.append(row)
return products
def _upsert_raw_materials(db, rows: list[dict]) -> dict[str, RawMaterial]:
existing_map = {
material.name: material
for material in db.scalars(select(RawMaterial).where(RawMaterial.tenant_id == TENANT_ID)).all()
}
for row in rows:
material = existing_map.get(row["name"])
if material is None:
material = RawMaterial(
tenant_id=TENANT_ID,
name=row["name"],
supplier="Workbook Import",
unit_of_measure=row["unit_of_measure"],
kg_per_unit=row["kg_per_unit"],
status="active",
notes="Seeded from Input Cost Spreadsheet(1).xlsx",
)
db.add(material)
db.flush()
existing_map[row["name"]] = material
else:
material.unit_of_measure = row["unit_of_measure"]
material.kg_per_unit = row["kg_per_unit"]
material.status = "active"
material.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"
active_price = next((price for price in material.price_versions if price.status == "active"), None)
if row["market_value"] is not None and row["market_value"] > 0:
if active_price is None:
material.price_versions.append(
RawMaterialPriceVersion(
tenant_id=TENANT_ID,
market_value=row["market_value"],
waste_percentage=row["waste_percentage"],
effective_date=WORKBOOK_EFFECTIVE_DATE,
status="active",
notes="Seeded from Input Cost Spreadsheet(1).xlsx",
)
)
else:
active_price.market_value = row["market_value"]
active_price.waste_percentage = row["waste_percentage"]
active_price.effective_date = WORKBOOK_EFFECTIVE_DATE
active_price.status = "active"
active_price.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"
elif active_price is not None and active_price.market_value <= 0:
active_price.status = "inactive"
active_price.notes = "Disabled during workbook import because market value was non-positive"
db.flush()
return existing_map
def _upsert_process_rules(db, products: list[dict]) -> None:
existing_rules = {
rule.process_name: rule
for rule in db.scalars(select(ProcessCostRule).where(ProcessCostRule.tenant_id == TENANT_ID)).all()
}
for product in products:
process_name = product["bagging_process"]
if not process_name:
continue
rule = existing_rules.get(process_name)
if rule is None:
rule = ProcessCostRule(
tenant_id=TENANT_ID,
process_name=process_name,
grading_cost=product["grading_cost"],
bagging_cost=product["bagging_cost"],
cracking_cost=product["cracking_cost"],
)
db.add(rule)
existing_rules[process_name] = rule
else:
rule.grading_cost = product["grading_cost"]
rule.bagging_cost = product["bagging_cost"]
rule.cracking_cost = product["cracking_cost"]
def _upsert_packaging_and_freight_rules(db, products: list[dict]) -> None:
packaging_rules = {
(rule.sale_type, rule.unit_of_measure, rule.own_bag): rule
for rule in db.scalars(select(PackagingCostRule).where(PackagingCostRule.tenant_id == TENANT_ID)).all()
}
freight_rules = {
(rule.sale_type, rule.unit_of_measure): rule
for rule in db.scalars(select(FreightCostRule).where(FreightCostRule.tenant_id == TENANT_ID)).all()
}
for product in products:
packaging_key = (product["sale_type"], product["unit_of_measure"], product["own_bag"])
packaging_rule = packaging_rules.get(packaging_key)
if packaging_rule is None:
packaging_rule = PackagingCostRule(
tenant_id=TENANT_ID,
sale_type=product["sale_type"],
unit_of_measure=product["unit_of_measure"],
own_bag=product["own_bag"],
bag_cost=product["bag_cost"],
)
db.add(packaging_rule)
packaging_rules[packaging_key] = packaging_rule
else:
packaging_rule.bag_cost = product["bag_cost"]
freight_key = (product["sale_type"], product["unit_of_measure"])
freight_rule = freight_rules.get(freight_key)
if freight_rule is None:
freight_rule = FreightCostRule(
tenant_id=TENANT_ID,
sale_type=product["sale_type"],
unit_of_measure=product["unit_of_measure"],
cost_per_unit=product["freight_cost"],
)
db.add(freight_rule)
freight_rules[freight_key] = freight_rule
else:
freight_rule.cost_per_unit = product["freight_cost"]
def _upsert_mix(
db,
*,
client_name: str,
mix_name: str,
ingredients: list[dict],
raw_material_map: dict[str, RawMaterial],
mix_cache: dict[tuple[str, str], Mix],
) -> Mix:
key = (client_name, mix_name)
mix = mix_cache.get(key)
if mix is None:
mix = db.scalar(
select(Mix).where(
Mix.tenant_id == TENANT_ID,
Mix.client_name == client_name,
Mix.name == mix_name,
)
)
if mix is None:
mix = Mix(
tenant_id=TENANT_ID,
client_name=client_name,
name=mix_name,
status="active",
version=1,
notes="Seeded from Input Cost Spreadsheet(1).xlsx",
)
db.add(mix)
db.flush()
mix_cache[key] = mix
existing_ingredients = {
ingredient.raw_material_id: ingredient
for ingredient in db.scalars(select(MixIngredient).where(MixIngredient.mix_id == mix.id)).all()
}
desired_ids = set()
for ingredient_row in ingredients:
raw_material = raw_material_map.get(ingredient_row["raw_material_name"])
if raw_material is None:
continue
desired_ids.add(raw_material.id)
ingredient = existing_ingredients.get(raw_material.id)
if ingredient is None:
db.add(
MixIngredient(
tenant_id=TENANT_ID,
mix_id=mix.id,
raw_material_id=raw_material.id,
quantity_kg=ingredient_row["quantity_kg"],
)
)
else:
ingredient.quantity_kg = ingredient_row["quantity_kg"]
for raw_material_id, ingredient in existing_ingredients.items():
if raw_material_id not in desired_ids:
db.delete(ingredient)
return mix
def _ensure_single_material_mix(
db,
*,
client_name: str,
raw_material_name: str,
raw_material_map: dict[str, RawMaterial],
mix_cache: dict[tuple[str, str], Mix],
) -> Mix:
raw_material = raw_material_map[raw_material_name]
return _upsert_mix(
db,
client_name=client_name,
mix_name=raw_material_name,
ingredients=[
{
"raw_material_name": raw_material_name,
"quantity_kg": raw_material.kg_per_unit or 1.0,
}
],
raw_material_map=raw_material_map,
mix_cache=mix_cache,
)
def _upsert_products(db, products: list[dict], mix_lookup: dict[tuple[str, str], Mix], raw_material_map: dict[str, RawMaterial]) -> None:
mix_cache = dict(mix_lookup)
mixes_by_name: dict[str, list[Mix]] = {}
for mix in mix_cache.values():
mixes_by_name.setdefault(mix.name, []).append(mix)
existing_products = {
product.item_id: product
for product in db.scalars(select(Product).where(Product.tenant_id == TENANT_ID)).all()
if product.item_id
}
for row in products:
mix = mix_cache.get((row["client_name"], row["mix_name"]))
if mix is None:
named_mixes = mixes_by_name.get(row["mix_name"], [])
if len(named_mixes) == 1:
mix = named_mixes[0]
if mix is None and row["mix_name"] in raw_material_map:
mix = _ensure_single_material_mix(
db,
client_name=row["client_name"],
raw_material_name=row["mix_name"],
raw_material_map=raw_material_map,
mix_cache=mix_cache,
)
if mix is None:
continue
product = existing_products.get(row["item_id"])
if product is None:
product = Product(
tenant_id=TENANT_ID,
client_name=row["client_name"],
item_id=row["item_id"],
name=row["name"],
mix_id=mix.id,
sale_type=row["sale_type"],
own_bag=row["own_bag"],
unit_of_measure=row["unit_of_measure"],
items_per_pallet=row["items_per_pallet"],
bagging_process=row["bagging_process"],
distributor_margin=row["distributor_margin"],
wholesale_margin=row["wholesale_margin"],
notes="Seeded from Input Cost Spreadsheet(1).xlsx",
)
db.add(product)
existing_products[row["item_id"]] = product
else:
product.client_name = row["client_name"]
product.name = row["name"]
product.mix_id = mix.id
product.sale_type = row["sale_type"]
product.own_bag = row["own_bag"]
product.unit_of_measure = row["unit_of_measure"]
product.items_per_pallet = row["items_per_pallet"]
product.bagging_process = row["bagging_process"]
product.distributor_margin = row["distributor_margin"]
product.wholesale_margin = row["wholesale_margin"]
product.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"
def seed_client_access(db):
existing = db.scalar(select(ClientAccount.id))
if existing is not None:
return
specialty = ClientAccount(
tenant_id=TENANT_ID,
name="Hunter Premium Produce",
client_code="HPP",
status="active",
powerbi_workspace="hunter-premium-produce-prod",
notes="Primary production client for the Lean 101 admin and access workflows",
)
loft = ClientAccount(
tenant_id="loft-grains",
name="Loft Grains",
client_code="LOFT",
status="onboarding",
powerbi_workspace="farm-ops-sandbox",
notes="Onboarding workspace used to test staged user enablement",
)
db.add_all([specialty, loft])
db.flush()
specialty.users.extend(
[
ClientUser(
tenant_id=specialty.tenant_id,
full_name="Amelia Hart",
email="operator@example.com",
role="superadmin",
status="active",
is_new_user=False,
last_login_at=datetime(2026, 4, 24, 11, 30),
),
ClientUser(
tenant_id=specialty.tenant_id,
full_name="Ethan Cole",
email="ethan.cole@hunterpremiumproduce.example",
role="operator",
status="invited",
is_new_user=True,
),
]
)
loft.users.extend(
[
ClientUser(
tenant_id=loft.tenant_id,
full_name="Ruby Singh",
email="ruby.singh@loftgrains.example",
role="viewer",
status="active",
is_new_user=False,
last_login_at=datetime(2026, 4, 22, 9, 10),
)
]
)
enabled_feature_map = {
TENANT_ID: {"dashboard", "raw_materials", "mix_master", "mix_calculator", "products", "scenarios", "powerbi_export", "client_access"},
"loft-grains": {"dashboard", "mix_calculator", "products", "powerbi_export"},
}
for client in (specialty, loft):
enabled_keys = enabled_feature_map[client.tenant_id]
for feature_key, feature_name, feature_group, description in MODULE_CATALOG:
client.features.append(
ClientFeatureAccess(
tenant_id=client.tenant_id,
feature_key=feature_key,
feature_name=feature_name,
feature_group=feature_group,
description=description,
enabled=feature_key in enabled_keys,
)
)
for user in client.users:
for module_key, _, _, _ in MODULE_CATALOG:
user.module_permissions.append(
ClientUserModulePermission(
tenant_id=client.tenant_id,
client_account_id=client.id,
module_key=module_key,
access_level=default_access_level_for_role(user.role, module_key),
)
)
specialty.audit_events.append(
ClientAccessAuditEvent(
tenant_id=specialty.tenant_id,
actor_type="seed",
actor_name="Lean 101 Seeder",
actor_email="system@lean101.local",
actor_role="system",
action="client_access.seeded",
target_type="client_account",
target_id=specialty.id,
module_key="client_access",
summary="Initial client access controls, module permissions, and feature flags were seeded.",
)
)
def seed_costing_workspace(db):
workbook = _load_workbook()
raw_material_rows = _read_raw_material_rows(workbook)
mix_rows = _read_mix_rows(workbook)
product_rows = _read_product_rows(workbook)
raw_material_map = _upsert_raw_materials(db, raw_material_rows)
_upsert_process_rules(db, product_rows)
_upsert_packaging_and_freight_rules(db, product_rows)
mix_cache: dict[tuple[str, str], Mix] = {}
for mix_row in mix_rows.values():
mix = _upsert_mix(
db,
client_name=mix_row["client_name"],
mix_name=mix_row["name"],
ingredients=mix_row["ingredients"],
raw_material_map=raw_material_map,
mix_cache=mix_cache,
)
mix_cache[(mix_row["client_name"], mix_row["name"])] = mix
_upsert_products(db, product_rows, mix_cache, raw_material_map)
def seed_if_empty():
Base.metadata.create_all(bind=engine)
with SessionLocal() as db:
if WORKBOOK_PATH.exists():
seed_costing_workspace(db)
else:
logger.warning("Skipping costing workspace seed because workbook is missing at %s", WORKBOOK_PATH)
seed_client_access(db)
seed_access(db)
db.commit()
if __name__ == "__main__":
seed_if_empty()