from __future__ import annotations from collections import Counter from datetime import date, datetime from pathlib import Path import re from openpyxl import load_workbook from sqlalchemy import select from app.db.session import Base, SessionLocal, engine from app.models.assumption import FreightCostRule, PackagingCostRule, ProcessCostRule from app.models.client_access import ClientAccessAuditEvent, ClientAccount, ClientFeatureAccess, ClientUser, ClientUserModulePermission from app.models.mix import Mix, MixIngredient from app.models.product import Product from app.models.raw_material import RawMaterial, RawMaterialPriceVersion from app.services.client_access_service import MODULE_CATALOG, default_access_level_for_role TENANT_ID = "hunter-premium-produce" WORKBOOK_EFFECTIVE_DATE = date(2025, 9, 1) WORKBOOK_SENTINEL_ITEM_ID = "404266" WORKBOOK_PATH = Path(__file__).resolve().parents[2] / "Input Cost Spreadsheet(1).xlsx" def _text(value) -> str | None: if value is None: return None if isinstance(value, str): normalized = value.strip() if not normalized: return None if normalized.lower() in {"#n/a", "#value!", "n/a", "na", "none"}: return None return normalized return str(value).strip() or None def _number(value) -> float | None: if value is None: return None if isinstance(value, bool): return float(value) if isinstance(value, (int, float)): return float(value) if isinstance(value, str): normalized = value.strip().replace(",", "") if not normalized or normalized.lower() in {"#n/a", "#value!", "n/a", "na", "none"}: return None try: return float(normalized) except ValueError: return None return None def _format_quantity(value: float | int | None) -> str: if value is None: return "0" numeric = float(value) if abs(numeric - round(numeric)) < 1e-9: return str(int(round(numeric))) return f"{numeric:.4f}".rstrip("0").rstrip(".") def _slug(value: str | None, *, fallback: str) -> str: base = _text(value) or fallback slug = re.sub(r"[^a-z0-9]+", "_", base.lower()).strip("_") return slug or fallback def _normalize_sale_type(value) -> str: label = (_text(value) or "standard").lower() if label == "per unit": return "per_unit" return re.sub(r"[^a-z0-9]+", "_", label) def _sheet_own_bag_to_model(value) -> bool: label = (_text(value) or "").lower() return label == "no bag" def _normalize_raw_material_unit(unit_label, kg_per_unit: float | None) -> str: label = (_text(unit_label) or "").lower() if label in {"per ton", "per tonne", "ton", "tonne"}: return "tonne" if label == "kg": return "kg" if label == "per bag 20kg": return "20kg bag" if "20 kg" in label: return "20kg bag" if "kg" in label and kg_per_unit: return f"{_format_quantity(kg_per_unit)}kg bag" if kg_per_unit == 1000: return "tonne" return _text(unit_label) or "kg" def _build_base_unit_label(sale_type: str, std_unit: float, own_bag: bool) -> str: if sale_type == "standard": return f"{_format_quantity(std_unit)}kg no bag" if own_bag else f"{_format_quantity(std_unit)}kg bag" if sale_type == "bulka": return f"{_format_quantity(std_unit)}kg bulka" if sale_type == "per_unit": return f"{_format_quantity(std_unit)} unit" return f"{_format_quantity(std_unit)}kg" def _derive_margin(finished_cost: float, sell_price) -> float | None: price = _number(sell_price) if price is None or price <= 0 or finished_cost <= 0 or price <= finished_cost: return None margin = 1 - (finished_cost / price) if margin <= 0 or margin >= 1: return None return round(margin, 6) def _build_process_key(label, grading_cost: float, bagging_cost: float, cracking_cost: float) -> str | None: if abs(grading_cost) < 1e-9 and abs(bagging_cost) < 1e-9 and abs(cracking_cost) < 1e-9: return None base = _slug(label, fallback="custom_process") return f"{base}_g{int(round(grading_cost * 1000))}_b{int(round(bagging_cost * 1000))}_c{int(round(cracking_cost * 1000))}" def _load_workbook(): if not WORKBOOK_PATH.exists(): raise FileNotFoundError(f"Workbook not found at {WORKBOOK_PATH}") return load_workbook(WORKBOOK_PATH, data_only=True) def _read_raw_material_rows(workbook) -> list[dict]: rows: list[dict] = [] worksheet = workbook["C- Raw Products Costs"] for row in worksheet.iter_rows(min_row=3, values_only=True): name = _text(row[0]) if not name: continue market_value = _number(row[1]) kg_per_unit = _number(row[3]) waste_percentage = _number(row[4]) or 0.0 cost_per_kg = _number(row[7]) if cost_per_kg is None and market_value is None: continue if kg_per_unit is None or kg_per_unit <= 0: kg_per_unit = 1.0 if market_value is None and cost_per_kg is not None: market_value = round(cost_per_kg * kg_per_unit, 4) rows.append( { "name": name, "unit_of_measure": _normalize_raw_material_unit(row[2], kg_per_unit), "kg_per_unit": kg_per_unit, "market_value": round(market_value, 4) if market_value is not None else None, "waste_percentage": waste_percentage, } ) return rows def _read_mix_rows(workbook) -> dict[tuple[str, str], dict]: worksheet = workbook["M - All"] header_row = next(worksheet.iter_rows(min_row=1, max_row=1, values_only=True)) ingredient_names = [_text(value) for value in header_row[3:] if _text(value)] best_rows: dict[tuple[str, str], dict] = {} for row in worksheet.iter_rows(min_row=2, values_only=True): client_name = _text(row[0]) mix_name = _text(row[1]) if not client_name or not mix_name: continue ingredients = [] for ingredient_name, quantity in zip(ingredient_names, row[3 : 3 + len(ingredient_names)]): numeric_quantity = _number(quantity) if ingredient_name and numeric_quantity and numeric_quantity > 0: ingredients.append({"raw_material_name": ingredient_name, "quantity_kg": numeric_quantity}) if not ingredients: continue total_kg = _number(row[2]) or round(sum(item["quantity_kg"] for item in ingredients), 4) score = (len(ingredients), 1 if _number(row[2]) is not None else 0, total_kg) key = (client_name, mix_name) current = best_rows.get(key) if current is None or score > current["score"]: best_rows[key] = { "client_name": client_name, "name": mix_name, "ingredients": ingredients, "total_kg": total_kg, "score": score, } return best_rows def _read_product_rows(workbook) -> list[dict]: worksheet = workbook["Product Cost - Price"] raw_rows: list[dict] = [] unit_variants: dict[tuple[str, bool, float], Counter[tuple[float, float]]] = {} for row in worksheet.iter_rows(min_row=5, values_only=True): item_id = _text(row[1]) name = _text(row[2]) mix_name = _text(row[3]) if not item_id or not name or not mix_name: continue sale_type = _normalize_sale_type(row[4]) own_bag = _sheet_own_bag_to_model(row[5]) std_unit = _number(row[6]) or 1.0 bag_cost = round(_number(row[15]) or 0.0, 4) freight_cost = round(_number(row[16]) or 0.0, 4) base_unit_key = (sale_type, own_bag, std_unit) unit_variants.setdefault(base_unit_key, Counter())[(bag_cost, freight_cost)] += 1 raw_rows.append( { "client_name": _text(row[0]) or "General", "item_id": item_id, "name": name, "mix_name": mix_name, "sale_type": sale_type, "own_bag": own_bag, "std_unit": std_unit, "items_per_pallet": int(round(_number(row[7]) or 1)), "grading_cost": round(_number(row[12]) or 0.0, 4), "bagging_cost": round(_number(row[13]) or 0.0, 4), "cracking_cost": round(_number(row[14]) or 0.0, 4), "bag_cost": bag_cost, "freight_cost": freight_cost, "finished_product_delivered": round(_number(row[17]) or 0.0, 4), "distributor_margin": _derive_margin(round(_number(row[17]) or 0.0, 4), row[19]), "wholesale_margin": _derive_margin(round(_number(row[17]) or 0.0, 4), row[20]), "process_label": _text(row[8]), "sheet_own_bag": _text(row[5]), } ) products: list[dict] = [] for row in raw_rows: base_unit_key = (row["sale_type"], row["own_bag"], row["std_unit"]) unit_label = _build_base_unit_label(row["sale_type"], row["std_unit"], row["own_bag"]) variant_counts = unit_variants[base_unit_key] if len(variant_counts) > 1: current_variant = (row["bag_cost"], row["freight_cost"]) primary_variant = variant_counts.most_common(1)[0][0] if current_variant != primary_variant: if row["sheet_own_bag"] == "Yes": unit_label = f"{unit_label} (Own Bag)" elif row["client_name"] == "Peckish": unit_label = f"{unit_label} (Peckish)" elif row["client_name"] == "Uncategorized": unit_label = f"{unit_label} (Bulk)" else: unit_label = f"{unit_label} ({row['client_name']})" process_key = _build_process_key( row["process_label"], row["grading_cost"], row["bagging_cost"], row["cracking_cost"], ) row["unit_of_measure"] = unit_label row["bagging_process"] = process_key products.append(row) return products def _upsert_raw_materials(db, rows: list[dict]) -> dict[str, RawMaterial]: existing_map = { material.name: material for material in db.scalars(select(RawMaterial).where(RawMaterial.tenant_id == TENANT_ID)).all() } for row in rows: material = existing_map.get(row["name"]) if material is None: material = RawMaterial( tenant_id=TENANT_ID, name=row["name"], supplier="Workbook Import", unit_of_measure=row["unit_of_measure"], kg_per_unit=row["kg_per_unit"], status="active", notes="Seeded from Input Cost Spreadsheet(1).xlsx", ) db.add(material) db.flush() existing_map[row["name"]] = material else: material.unit_of_measure = row["unit_of_measure"] material.kg_per_unit = row["kg_per_unit"] material.status = "active" material.notes = "Seeded from Input Cost Spreadsheet(1).xlsx" active_price = next((price for price in material.price_versions if price.status == "active"), None) if row["market_value"] is not None and row["market_value"] > 0: if active_price is None: material.price_versions.append( RawMaterialPriceVersion( tenant_id=TENANT_ID, market_value=row["market_value"], waste_percentage=row["waste_percentage"], effective_date=WORKBOOK_EFFECTIVE_DATE, status="active", notes="Seeded from Input Cost Spreadsheet(1).xlsx", ) ) else: active_price.market_value = row["market_value"] active_price.waste_percentage = row["waste_percentage"] active_price.effective_date = WORKBOOK_EFFECTIVE_DATE active_price.status = "active" active_price.notes = "Seeded from Input Cost Spreadsheet(1).xlsx" elif active_price is not None and active_price.market_value <= 0: active_price.status = "inactive" active_price.notes = "Disabled during workbook import because market value was non-positive" db.flush() return existing_map def _upsert_process_rules(db, products: list[dict]) -> None: existing_rules = { rule.process_name: rule for rule in db.scalars(select(ProcessCostRule).where(ProcessCostRule.tenant_id == TENANT_ID)).all() } for product in products: process_name = product["bagging_process"] if not process_name: continue rule = existing_rules.get(process_name) if rule is None: rule = ProcessCostRule( tenant_id=TENANT_ID, process_name=process_name, grading_cost=product["grading_cost"], bagging_cost=product["bagging_cost"], cracking_cost=product["cracking_cost"], ) db.add(rule) existing_rules[process_name] = rule else: rule.grading_cost = product["grading_cost"] rule.bagging_cost = product["bagging_cost"] rule.cracking_cost = product["cracking_cost"] def _upsert_packaging_and_freight_rules(db, products: list[dict]) -> None: packaging_rules = { (rule.sale_type, rule.unit_of_measure, rule.own_bag): rule for rule in db.scalars(select(PackagingCostRule).where(PackagingCostRule.tenant_id == TENANT_ID)).all() } freight_rules = { (rule.sale_type, rule.unit_of_measure): rule for rule in db.scalars(select(FreightCostRule).where(FreightCostRule.tenant_id == TENANT_ID)).all() } for product in products: packaging_key = (product["sale_type"], product["unit_of_measure"], product["own_bag"]) packaging_rule = packaging_rules.get(packaging_key) if packaging_rule is None: packaging_rule = PackagingCostRule( tenant_id=TENANT_ID, sale_type=product["sale_type"], unit_of_measure=product["unit_of_measure"], own_bag=product["own_bag"], bag_cost=product["bag_cost"], ) db.add(packaging_rule) packaging_rules[packaging_key] = packaging_rule else: packaging_rule.bag_cost = product["bag_cost"] freight_key = (product["sale_type"], product["unit_of_measure"]) freight_rule = freight_rules.get(freight_key) if freight_rule is None: freight_rule = FreightCostRule( tenant_id=TENANT_ID, sale_type=product["sale_type"], unit_of_measure=product["unit_of_measure"], cost_per_unit=product["freight_cost"], ) db.add(freight_rule) freight_rules[freight_key] = freight_rule else: freight_rule.cost_per_unit = product["freight_cost"] def _upsert_mix( db, *, client_name: str, mix_name: str, ingredients: list[dict], raw_material_map: dict[str, RawMaterial], mix_cache: dict[tuple[str, str], Mix], ) -> Mix: key = (client_name, mix_name) mix = mix_cache.get(key) if mix is None: mix = db.scalar( select(Mix).where( Mix.tenant_id == TENANT_ID, Mix.client_name == client_name, Mix.name == mix_name, ) ) if mix is None: mix = Mix( tenant_id=TENANT_ID, client_name=client_name, name=mix_name, status="active", version=1, notes="Seeded from Input Cost Spreadsheet(1).xlsx", ) db.add(mix) db.flush() mix_cache[key] = mix existing_ingredients = { ingredient.raw_material_id: ingredient for ingredient in db.scalars(select(MixIngredient).where(MixIngredient.mix_id == mix.id)).all() } desired_ids = set() for ingredient_row in ingredients: raw_material = raw_material_map.get(ingredient_row["raw_material_name"]) if raw_material is None: continue desired_ids.add(raw_material.id) ingredient = existing_ingredients.get(raw_material.id) if ingredient is None: db.add( MixIngredient( tenant_id=TENANT_ID, mix_id=mix.id, raw_material_id=raw_material.id, quantity_kg=ingredient_row["quantity_kg"], ) ) else: ingredient.quantity_kg = ingredient_row["quantity_kg"] for raw_material_id, ingredient in existing_ingredients.items(): if raw_material_id not in desired_ids: db.delete(ingredient) return mix def _ensure_single_material_mix( db, *, client_name: str, raw_material_name: str, raw_material_map: dict[str, RawMaterial], mix_cache: dict[tuple[str, str], Mix], ) -> Mix: raw_material = raw_material_map[raw_material_name] return _upsert_mix( db, client_name=client_name, mix_name=raw_material_name, ingredients=[ { "raw_material_name": raw_material_name, "quantity_kg": raw_material.kg_per_unit or 1.0, } ], raw_material_map=raw_material_map, mix_cache=mix_cache, ) def _upsert_products(db, products: list[dict], mix_lookup: dict[tuple[str, str], Mix], raw_material_map: dict[str, RawMaterial]) -> None: mix_cache = dict(mix_lookup) mixes_by_name: dict[str, list[Mix]] = {} for mix in mix_cache.values(): mixes_by_name.setdefault(mix.name, []).append(mix) existing_products = { product.item_id: product for product in db.scalars(select(Product).where(Product.tenant_id == TENANT_ID)).all() if product.item_id } for row in products: mix = mix_cache.get((row["client_name"], row["mix_name"])) if mix is None: named_mixes = mixes_by_name.get(row["mix_name"], []) if len(named_mixes) == 1: mix = named_mixes[0] if mix is None and row["mix_name"] in raw_material_map: mix = _ensure_single_material_mix( db, client_name=row["client_name"], raw_material_name=row["mix_name"], raw_material_map=raw_material_map, mix_cache=mix_cache, ) if mix is None: continue product = existing_products.get(row["item_id"]) if product is None: product = Product( tenant_id=TENANT_ID, client_name=row["client_name"], item_id=row["item_id"], name=row["name"], mix_id=mix.id, sale_type=row["sale_type"], own_bag=row["own_bag"], unit_of_measure=row["unit_of_measure"], items_per_pallet=row["items_per_pallet"], bagging_process=row["bagging_process"], distributor_margin=row["distributor_margin"], wholesale_margin=row["wholesale_margin"], notes="Seeded from Input Cost Spreadsheet(1).xlsx", ) db.add(product) existing_products[row["item_id"]] = product else: product.client_name = row["client_name"] product.name = row["name"] product.mix_id = mix.id product.sale_type = row["sale_type"] product.own_bag = row["own_bag"] product.unit_of_measure = row["unit_of_measure"] product.items_per_pallet = row["items_per_pallet"] product.bagging_process = row["bagging_process"] product.distributor_margin = row["distributor_margin"] product.wholesale_margin = row["wholesale_margin"] product.notes = "Seeded from Input Cost Spreadsheet(1).xlsx" def seed_client_access(db): existing = db.scalar(select(ClientAccount.id)) if existing is not None: return specialty = ClientAccount( tenant_id=TENANT_ID, name="Hunter Premium Produce", client_code="HPP", status="active", powerbi_workspace="hunter-premium-produce-prod", notes="Primary production client for the Lean 101 admin and access workflows", ) loft = ClientAccount( tenant_id="loft-grains", name="Loft Grains", client_code="LOFT", status="onboarding", powerbi_workspace="farm-ops-sandbox", notes="Onboarding workspace used to test staged user enablement", ) db.add_all([specialty, loft]) db.flush() specialty.users.extend( [ ClientUser( tenant_id=specialty.tenant_id, full_name="Amelia Hart", email="operator@example.com", role="superadmin", status="active", is_new_user=False, last_login_at=datetime(2026, 4, 24, 11, 30), ), ClientUser( tenant_id=specialty.tenant_id, full_name="Ethan Cole", email="ethan.cole@hunterpremiumproduce.example", role="operator", status="invited", is_new_user=True, ), ] ) loft.users.extend( [ ClientUser( tenant_id=loft.tenant_id, full_name="Ruby Singh", email="ruby.singh@loftgrains.example", role="viewer", status="active", is_new_user=False, last_login_at=datetime(2026, 4, 22, 9, 10), ) ] ) enabled_feature_map = { TENANT_ID: {"dashboard", "raw_materials", "mix_master", "mix_calculator", "products", "scenarios", "powerbi_export", "client_access"}, "loft-grains": {"dashboard", "mix_calculator", "products", "powerbi_export"}, } for client in (specialty, loft): enabled_keys = enabled_feature_map[client.tenant_id] for feature_key, feature_name, feature_group, description in MODULE_CATALOG: client.features.append( ClientFeatureAccess( tenant_id=client.tenant_id, feature_key=feature_key, feature_name=feature_name, feature_group=feature_group, description=description, enabled=feature_key in enabled_keys, ) ) for user in client.users: for module_key, _, _, _ in MODULE_CATALOG: user.module_permissions.append( ClientUserModulePermission( tenant_id=client.tenant_id, client_account_id=client.id, module_key=module_key, access_level=default_access_level_for_role(user.role, module_key), ) ) specialty.audit_events.append( ClientAccessAuditEvent( tenant_id=specialty.tenant_id, actor_type="seed", actor_name="Lean 101 Seeder", actor_email="system@lean101.local", actor_role="system", action="client_access.seeded", target_type="client_account", target_id=specialty.id, module_key="client_access", summary="Initial client access controls, module permissions, and feature flags were seeded.", ) ) def seed_costing_workspace(db): workbook = _load_workbook() raw_material_rows = _read_raw_material_rows(workbook) mix_rows = _read_mix_rows(workbook) product_rows = _read_product_rows(workbook) raw_material_map = _upsert_raw_materials(db, raw_material_rows) _upsert_process_rules(db, product_rows) _upsert_packaging_and_freight_rules(db, product_rows) mix_cache: dict[tuple[str, str], Mix] = {} for mix_row in mix_rows.values(): mix = _upsert_mix( db, client_name=mix_row["client_name"], mix_name=mix_row["name"], ingredients=mix_row["ingredients"], raw_material_map=raw_material_map, mix_cache=mix_cache, ) mix_cache[(mix_row["client_name"], mix_row["name"])] = mix _upsert_products(db, product_rows, mix_cache, raw_material_map) def seed_if_empty(): Base.metadata.create_all(bind=engine) with SessionLocal() as db: seed_costing_workspace(db) seed_client_access(db) db.commit() if __name__ == "__main__": seed_if_empty()