backend/app/seed.py

from __future__ import annotations

from collections import Counter
from datetime import date, datetime
import logging
import os
from pathlib import Path
import re

from openpyxl import load_workbook
from sqlalchemy import func, select
from sqlalchemy.orm import selectinload

from app.db.session import Base, SessionLocal, engine
from app.models.assumption import FreightCostRule, PackagingCostRule, ProcessCostRule
from app.models.client_access import ClientAccessAuditEvent, ClientAccount, ClientFeatureAccess, ClientUser, ClientUserModulePermission
from app.models.mix import Mix, MixIngredient
from app.models.product import Product, ProductIngredient
from app.models.raw_material import RawMaterial, RawMaterialPriceVersion
from app.models.throughput import ProductionThroughput, ThroughputProduct
from app.seed_access import seed_access
from app.services.client_access_service import MODULE_CATALOG, default_access_level_for_role
from app.services.throughput_service import import_workbook as import_throughput_workbook
from app.services.throughput_service import resolve_workbook_path as resolve_throughput_workbook_path


TENANT_ID = "hunter-premium-produce"
WORKBOOK_EFFECTIVE_DATE = date(2025, 9, 1)
WORKBOOK_SENTINEL_ITEM_ID = "404266"
WORKBOOK_FILENAME = "1.xlsx"
LEGACY_WORKBOOK_FILENAME = "Input Cost Spreadsheet(1).xlsx"
logger = logging.getLogger("data_entry_app.seed")
HIDDEN_PRODUCT_CLIENTS = frozenset(
    {
        "Bird Grits",
        "Chaff",
        "Hay & Straw",
        "Hunter Premium Produce",
        "Straight Grain",
        "Uncategorized",
        "Uncategorised",
    }
)


def _workbook_candidates() -> list[Path]:
    env_value = os.getenv("WORKBOOK_PATH")
    env_path = env_value.strip() if isinstance(env_value, str) and env_value.strip() else None
    repo_root = Path(__file__).resolve().parents[2]
    cwd = Path.cwd()

    candidates = [
        Path(env_path) if env_path else None,
        repo_root / "input_data" / WORKBOOK_FILENAME,
        cwd / "input_data" / WORKBOOK_FILENAME,
        Path("/srv/lean101-clients") / WORKBOOK_FILENAME,
        repo_root / WORKBOOK_FILENAME,
        cwd / WORKBOOK_FILENAME,
        Path("/app") / WORKBOOK_FILENAME,
        Path("/") / WORKBOOK_FILENAME,
        repo_root / LEGACY_WORKBOOK_FILENAME,
        cwd / LEGACY_WORKBOOK_FILENAME,
        Path("/srv/lean101-clients") / LEGACY_WORKBOOK_FILENAME,
        Path("/app") / LEGACY_WORKBOOK_FILENAME,
        Path("/") / LEGACY_WORKBOOK_FILENAME,
    ]

    ordered: list[Path] = []
    seen: set[str] = set()
    for candidate in candidates:
        if candidate is None:
            continue
        key = str(candidate)
        if key in seen:
            continue
        seen.add(key)
        ordered.append(candidate)
    return ordered


def _resolve_workbook_path() -> Path:
    for candidate in _workbook_candidates():
        if candidate.exists():
            return candidate
    return _workbook_candidates()[0]


def _text(value) -> str | None:
    if value is None:
        return None
    if isinstance(value, str):
        normalized = value.strip()
        if not normalized:
            return None
        if normalized.lower() in {"#n/a", "#value!", "n/a", "na", "none"}:
            return None
        return normalized
    return str(value).strip() or None


def _number(value) -> float | None:
    if value is None:
        return None
    if isinstance(value, bool):
        return float(value)
    if isinstance(value, (int, float)):
        return float(value)
    if isinstance(value, str):
        normalized = value.strip().replace(",", "")
        if not normalized or normalized.lower() in {"#n/a", "#value!", "n/a", "na", "none"}:
            return None
        try:
            return float(normalized)
        except ValueError:
            return None
    return None


def _format_quantity(value: float | int | None) -> str:
    if value is None:
        return "0"
    numeric = float(value)
    if abs(numeric - round(numeric)) < 1e-9:
        return str(int(round(numeric)))
    return f"{numeric:.4f}".rstrip("0").rstrip(".")


def _slug(value: str | None, *, fallback: str) -> str:
    base = _text(value) or fallback
    slug = re.sub(r"[^a-z0-9]+", "_", base.lower()).strip("_")
    return slug or fallback


def _normalize_sale_type(value) -> str:
    label = (_text(value) or "standard").lower()
    if label == "per unit":
        return "per_unit"
    return re.sub(r"[^a-z0-9]+", "_", label)


def _sheet_own_bag_to_model(value) -> bool:
    label = (_text(value) or "").lower()
    return label == "no bag"


def _normalize_raw_material_unit(unit_label, kg_per_unit: float | None) -> str:
    label = (_text(unit_label) or "").lower()
    if label in {"per ton", "per tonne", "ton", "tonne"}:
        return "tonne"
    if label == "kg":
        return "kg"
    if label == "per bag 20kg":
        return "20kg bag"
    if "20 kg" in label:
        return "20kg bag"
    if "kg" in label and kg_per_unit:
        return f"{_format_quantity(kg_per_unit)}kg bag"
    if kg_per_unit == 1000:
        return "tonne"
    return _text(unit_label) or "kg"


def _build_base_unit_label(sale_type: str, std_unit: float, own_bag: bool) -> str:
    if sale_type == "standard":
        return f"{_format_quantity(std_unit)}kg no bag" if own_bag else f"{_format_quantity(std_unit)}kg bag"
    if sale_type == "bulka":
        return f"{_format_quantity(std_unit)}kg bulka"
    if sale_type == "per_unit":
        return f"{_format_quantity(std_unit)} unit"
    return f"{_format_quantity(std_unit)}kg"


def _derive_margin(finished_cost: float, sell_price) -> float | None:
    price = _number(sell_price)
    if price is None or price <= 0 or finished_cost <= 0 or price <= finished_cost:
        return None
    margin = 1 - (finished_cost / price)
    if margin <= 0 or margin >= 1:
        return None
    return round(margin, 6)


def _build_process_key(label, grading_cost: float, bagging_cost: float, cracking_cost: float) -> str | None:
    if abs(grading_cost) < 1e-9 and abs(bagging_cost) < 1e-9 and abs(cracking_cost) < 1e-9:
        return None
    base = _slug(label, fallback="custom_process")
    return f"{base}_g{int(round(grading_cost * 1000))}_b{int(round(bagging_cost * 1000))}_c{int(round(cracking_cost * 1000))}"


def _load_workbook(*required_sheets: str):
    for candidate in _workbook_candidates():
        if not candidate.exists():
            continue
        workbook = load_workbook(candidate, data_only=True)
        if all(sheet_name in workbook.sheetnames for sheet_name in required_sheets):
            return workbook

    if required_sheets:
        raise FileNotFoundError(
            "No workbook with required sheets found. "
            f"Required sheets: {', '.join(required_sheets)}. "
            f"Checked: {', '.join(str(path) for path in _workbook_candidates())}"
        )

    workbook_path = _resolve_workbook_path()
    if not workbook_path.exists():
        raise FileNotFoundError(
            f"Workbook not found. Checked: {', '.join(str(path) for path in _workbook_candidates())}"
        )
    return load_workbook(workbook_path, data_only=True)


def _read_raw_material_rows(workbook) -> list[dict]:
    rows: list[dict] = []
    worksheet = workbook["C- Raw Products Costs"]
    for row in worksheet.iter_rows(min_row=3, values_only=True):
        name = _text(row[0])
        if not name:
            continue

        market_value = _number(row[1])
        kg_per_unit = _number(row[3])
        waste_percentage = _number(row[4]) or 0.0
        cost_per_kg = _number(row[7])

        if cost_per_kg is None and market_value is None:
            continue

        if kg_per_unit is None or kg_per_unit <= 0:
            kg_per_unit = 1.0

        if market_value is None and cost_per_kg is not None:
            market_value = round(cost_per_kg * kg_per_unit, 4)

        rows.append(
            {
                "name": name,
                "unit_of_measure": _normalize_raw_material_unit(row[2], kg_per_unit),
                "kg_per_unit": kg_per_unit,
                "market_value": round(market_value, 4) if market_value is not None else None,
                "waste_percentage": waste_percentage,
            }
        )
    return rows


def _read_mix_rows(workbook) -> dict[tuple[str, str], dict]:
    worksheet = workbook["M - All"]
    header_row = next(worksheet.iter_rows(min_row=1, max_row=1, values_only=True))
    ingredient_names = [_text(value) for value in header_row[3:] if _text(value)]
    best_rows: dict[tuple[str, str], dict] = {}

    for row in worksheet.iter_rows(min_row=2, values_only=True):
        client_name = _text(row[0])
        mix_name = _text(row[1])
        if not client_name or not mix_name:
            continue

        ingredients = []
        for ingredient_name, quantity in zip(ingredient_names, row[3 : 3 + len(ingredient_names)]):
            numeric_quantity = _number(quantity)
            if ingredient_name and numeric_quantity and numeric_quantity > 0:
                ingredients.append({"raw_material_name": ingredient_name, "quantity_kg": numeric_quantity})

        if not ingredients:
            continue

        total_kg = _number(row[2]) or round(sum(item["quantity_kg"] for item in ingredients), 4)
        score = (len(ingredients), 1 if _number(row[2]) is not None else 0, total_kg)
        key = (client_name, mix_name)
        current = best_rows.get(key)
        if current is None or score > current["score"]:
            best_rows[key] = {
                "client_name": client_name,
                "name": mix_name,
                "ingredients": ingredients,
                "total_kg": total_kg,
                "score": score,
            }

    return best_rows


def _read_product_ingredient_rows(workbook) -> dict[tuple[str, str], dict]:
    worksheet = workbook["mix_quantites_per_client_per_pr"]
    header_row = next(worksheet.iter_rows(min_row=1, max_row=1, values_only=True))
    ingredient_names = [_text(value) for value in header_row[3:] if _text(value)]
    rows: dict[tuple[str, str], dict] = {}

    for row in worksheet.iter_rows(min_row=2, values_only=True):
        client_name = _text(row[0])
        product_name = _text(row[1])
        if not client_name or not product_name:
            continue

        ingredients = []
        for sort_order, (ingredient_name, quantity) in enumerate(zip(ingredient_names, row[3 : 3 + len(ingredient_names)]), start=1):
            numeric_quantity = _number(quantity)
            if ingredient_name and numeric_quantity and numeric_quantity > 0:
                ingredients.append(
                    {
                        "raw_material_name": ingredient_name,
                        "quantity_kg": numeric_quantity,
                        "sort_order": sort_order,
                    }
                )

        if not ingredients:
            continue

        total_kg = _number(row[2]) or round(sum(item["quantity_kg"] for item in ingredients), 4)
        rows[(client_name, product_name)] = {
            "client_name": client_name,
            "product_name": product_name,
            "total_kg": total_kg,
            "ingredients": ingredients,
        }

    return rows


def _read_product_rows(workbook) -> list[dict]:
    worksheet = workbook["Product Cost - Price"]
    raw_rows: list[dict] = []
    unit_variants: dict[tuple[str, bool, float], Counter[tuple[float, float]]] = {}

    for row in worksheet.iter_rows(min_row=5, values_only=True):
        item_id = _text(row[1])
        name = _text(row[2])
        mix_name = _text(row[3])
        if not item_id or not name or not mix_name:
            continue

        sale_type = _normalize_sale_type(row[4])
        own_bag = _sheet_own_bag_to_model(row[5])
        std_unit = _number(row[6]) or 1.0
        bag_cost = round(_number(row[15]) or 0.0, 4)
        freight_cost = round(_number(row[16]) or 0.0, 4)
        base_unit_key = (sale_type, own_bag, std_unit)

        unit_variants.setdefault(base_unit_key, Counter())[(bag_cost, freight_cost)] += 1
        raw_rows.append(
            {
                "client_name": _text(row[0]) or "General",
                "item_id": item_id,
                "name": name,
                "mix_name": mix_name,
                "sale_type": sale_type,
                "own_bag": own_bag,
                "std_unit": std_unit,
                "items_per_pallet": int(round(_number(row[7]) or 1)),
                "grading_cost": round(_number(row[12]) or 0.0, 4),
                "bagging_cost": round(_number(row[13]) or 0.0, 4),
                "cracking_cost": round(_number(row[14]) or 0.0, 4),
                "bag_cost": bag_cost,
                "freight_cost": freight_cost,
                "finished_product_delivered": round(_number(row[17]) or 0.0, 4),
                "distributor_margin": _derive_margin(round(_number(row[17]) or 0.0, 4), row[19]),
                "wholesale_margin": _derive_margin(round(_number(row[17]) or 0.0, 4), row[20]),
                "process_label": _text(row[8]),
                "sheet_own_bag": _text(row[5]),
                "visible": (_text(row[0]) or "General") not in HIDDEN_PRODUCT_CLIENTS,
            }
        )

    products: list[dict] = []
    for row in raw_rows:
        base_unit_key = (row["sale_type"], row["own_bag"], row["std_unit"])
        unit_label = _build_base_unit_label(row["sale_type"], row["std_unit"], row["own_bag"])
        variant_counts = unit_variants[base_unit_key]
        if len(variant_counts) > 1:
            current_variant = (row["bag_cost"], row["freight_cost"])
            primary_variant = variant_counts.most_common(1)[0][0]
            if current_variant != primary_variant:
                if row["sheet_own_bag"] == "Yes":
                    unit_label = f"{unit_label} (Own Bag)"
                elif row["client_name"] == "Peckish":
                    unit_label = f"{unit_label} (Peckish)"
                elif row["client_name"] == "Uncategorized":
                    unit_label = f"{unit_label} (Bulk)"
                else:
                    unit_label = f"{unit_label} ({row['client_name']})"

        process_key = _build_process_key(
            row["process_label"],
            row["grading_cost"],
            row["bagging_cost"],
            row["cracking_cost"],
        )
        row["unit_of_measure"] = unit_label
        row["bagging_process"] = process_key
        products.append(row)

    return products


def _upsert_raw_materials(db, rows: list[dict]) -> dict[str, RawMaterial]:
    existing_map = {
        material.name: material
        for material in db.scalars(select(RawMaterial).where(RawMaterial.tenant_id == TENANT_ID)).all()
    }

    for row in rows:
        material = existing_map.get(row["name"])
        if material is None:
            material = RawMaterial(
                tenant_id=TENANT_ID,
                name=row["name"],
                supplier="Workbook Import",
                unit_of_measure=row["unit_of_measure"],
                kg_per_unit=row["kg_per_unit"],
                status="active",
                notes="Seeded from Input Cost Spreadsheet(1).xlsx",
            )
            db.add(material)
            db.flush()
            existing_map[row["name"]] = material
        else:
            material.unit_of_measure = row["unit_of_measure"]
            material.kg_per_unit = row["kg_per_unit"]
            material.status = "active"
            material.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"

        active_price = next((price for price in material.price_versions if price.status == "active"), None)
        if row["market_value"] is not None and row["market_value"] > 0:
            if active_price is None:
                material.price_versions.append(
                    RawMaterialPriceVersion(
                        tenant_id=TENANT_ID,
                        market_value=row["market_value"],
                        waste_percentage=row["waste_percentage"],
                        effective_date=WORKBOOK_EFFECTIVE_DATE,
                        status="active",
                        notes="Seeded from Input Cost Spreadsheet(1).xlsx",
                    )
                )
            else:
                active_price.market_value = row["market_value"]
                active_price.waste_percentage = row["waste_percentage"]
                active_price.effective_date = WORKBOOK_EFFECTIVE_DATE
                active_price.status = "active"
                active_price.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"
        elif active_price is not None and active_price.market_value <= 0:
            active_price.status = "inactive"
            active_price.notes = "Disabled during workbook import because market value was non-positive"

    db.flush()
    return existing_map


def _upsert_process_rules(db, products: list[dict]) -> None:
    existing_rules = {
        rule.process_name: rule
        for rule in db.scalars(select(ProcessCostRule).where(ProcessCostRule.tenant_id == TENANT_ID)).all()
    }

    for product in products:
        process_name = product["bagging_process"]
        if not process_name:
            continue

        rule = existing_rules.get(process_name)
        if rule is None:
            rule = ProcessCostRule(
                tenant_id=TENANT_ID,
                process_name=process_name,
                grading_cost=product["grading_cost"],
                bagging_cost=product["bagging_cost"],
                cracking_cost=product["cracking_cost"],
            )
            db.add(rule)
            existing_rules[process_name] = rule
        else:
            rule.grading_cost = product["grading_cost"]
            rule.bagging_cost = product["bagging_cost"]
            rule.cracking_cost = product["cracking_cost"]


def _upsert_packaging_and_freight_rules(db, products: list[dict]) -> None:
    packaging_rules = {
        (rule.sale_type, rule.unit_of_measure, rule.own_bag): rule
        for rule in db.scalars(select(PackagingCostRule).where(PackagingCostRule.tenant_id == TENANT_ID)).all()
    }
    freight_rules = {
        (rule.sale_type, rule.unit_of_measure): rule
        for rule in db.scalars(select(FreightCostRule).where(FreightCostRule.tenant_id == TENANT_ID)).all()
    }

    for product in products:
        packaging_key = (product["sale_type"], product["unit_of_measure"], product["own_bag"])
        packaging_rule = packaging_rules.get(packaging_key)
        if packaging_rule is None:
            packaging_rule = PackagingCostRule(
                tenant_id=TENANT_ID,
                sale_type=product["sale_type"],
                unit_of_measure=product["unit_of_measure"],
                own_bag=product["own_bag"],
                bag_cost=product["bag_cost"],
            )
            db.add(packaging_rule)
            packaging_rules[packaging_key] = packaging_rule
        else:
            packaging_rule.bag_cost = product["bag_cost"]

        freight_key = (product["sale_type"], product["unit_of_measure"])
        freight_rule = freight_rules.get(freight_key)
        if freight_rule is None:
            freight_rule = FreightCostRule(
                tenant_id=TENANT_ID,
                sale_type=product["sale_type"],
                unit_of_measure=product["unit_of_measure"],
                cost_per_unit=product["freight_cost"],
            )
            db.add(freight_rule)
            freight_rules[freight_key] = freight_rule
        else:
            freight_rule.cost_per_unit = product["freight_cost"]


def _upsert_mix(
    db,
    *,
    client_name: str,
    mix_name: str,
    ingredients: list[dict],
    raw_material_map: dict[str, RawMaterial],
    mix_cache: dict[tuple[str, str], Mix],
) -> Mix:
    key = (client_name, mix_name)
    mix = mix_cache.get(key)
    if mix is None:
        mix = db.scalar(
            select(Mix).where(
                Mix.tenant_id == TENANT_ID,
                Mix.client_name == client_name,
                Mix.name == mix_name,
            )
        )
        if mix is None:
            mix = Mix(
                tenant_id=TENANT_ID,
                client_name=client_name,
                name=mix_name,
                status="active",
                version=1,
                notes="Seeded from Input Cost Spreadsheet(1).xlsx",
            )
            db.add(mix)
            db.flush()
        mix_cache[key] = mix

    existing_ingredients = {
        ingredient.raw_material_id: ingredient
        for ingredient in db.scalars(select(MixIngredient).where(MixIngredient.mix_id == mix.id)).all()
    }
    desired_ids = set()
    for ingredient_row in ingredients:
        raw_material = raw_material_map.get(ingredient_row["raw_material_name"])
        if raw_material is None:
            continue

        desired_ids.add(raw_material.id)
        ingredient = existing_ingredients.get(raw_material.id)
        if ingredient is None:
            db.add(
                MixIngredient(
                    tenant_id=TENANT_ID,
                    mix_id=mix.id,
                    raw_material_id=raw_material.id,
                    quantity_kg=ingredient_row["quantity_kg"],
                )
            )
        else:
            ingredient.quantity_kg = ingredient_row["quantity_kg"]

    for raw_material_id, ingredient in existing_ingredients.items():
        if raw_material_id not in desired_ids:
            db.delete(ingredient)

    return mix


def _ensure_single_material_mix(
    db,
    *,
    client_name: str,
    raw_material_name: str,
    raw_material_map: dict[str, RawMaterial],
    mix_cache: dict[tuple[str, str], Mix],
) -> Mix:
    raw_material = raw_material_map[raw_material_name]
    return _upsert_mix(
        db,
        client_name=client_name,
        mix_name=raw_material_name,
        ingredients=[
            {
                "raw_material_name": raw_material_name,
                "quantity_kg": raw_material.kg_per_unit or 1.0,
            }
        ],
        raw_material_map=raw_material_map,
        mix_cache=mix_cache,
    )


def _upsert_products(db, products: list[dict], mix_lookup: dict[tuple[str, str], Mix], raw_material_map: dict[str, RawMaterial]) -> None:
    mix_cache = dict(mix_lookup)
    mixes_by_name: dict[str, list[Mix]] = {}
    for mix in mix_cache.values():
        mixes_by_name.setdefault(mix.name, []).append(mix)
    existing_products = {
        product.item_id: product
        for product in db.scalars(select(Product).where(Product.tenant_id == TENANT_ID)).all()
        if product.item_id
    }

    for row in products:
        mix = mix_cache.get((row["client_name"], row["mix_name"]))
        if mix is None:
            named_mixes = mixes_by_name.get(row["mix_name"], [])
            if len(named_mixes) == 1:
                mix = named_mixes[0]
        if mix is None and row["mix_name"] in raw_material_map:
            mix = _ensure_single_material_mix(
                db,
                client_name=row["client_name"],
                raw_material_name=row["mix_name"],
                raw_material_map=raw_material_map,
                mix_cache=mix_cache,
            )
        if mix is None:
            continue

        product = existing_products.get(row["item_id"])
        if product is None:
            product = Product(
                tenant_id=TENANT_ID,
                client_name=row["client_name"],
                item_id=row["item_id"],
                name=row["name"],
                mix_id=mix.id,
                sale_type=row["sale_type"],
                own_bag=row["own_bag"],
                visible=row["visible"],
                unit_of_measure=row["unit_of_measure"],
                items_per_pallet=row["items_per_pallet"],
                bagging_process=row["bagging_process"],
                distributor_margin=row["distributor_margin"],
                wholesale_margin=row["wholesale_margin"],
                notes="Seeded from Input Cost Spreadsheet(1).xlsx",
            )
            db.add(product)
            existing_products[row["item_id"]] = product
        else:
            product.client_name = row["client_name"]
            product.name = row["name"]
            product.mix_id = mix.id
            product.sale_type = row["sale_type"]
            product.own_bag = row["own_bag"]
            product.visible = row["visible"]
            product.unit_of_measure = row["unit_of_measure"]
            product.items_per_pallet = row["items_per_pallet"]
            product.bagging_process = row["bagging_process"]
            product.distributor_margin = row["distributor_margin"]
            product.wholesale_margin = row["wholesale_margin"]
            product.notes = "Seeded from Input Cost Spreadsheet(1).xlsx"


def _upsert_product_ingredients(
    db,
    *,
    product_rows: list[dict],
    product_ingredient_rows: dict[tuple[str, str], dict],
    raw_material_map: dict[str, RawMaterial],
) -> None:
    products = db.scalars(
        select(Product).where(Product.tenant_id == TENANT_ID).options(selectinload(Product.mix))
    ).all()

    products_by_formula_key: dict[tuple[str, str], list[Product]] = {}
    for product in products:
        candidate_keys = {
            (product.client_name, product.name),
        }
        if product.mix is not None:
            candidate_keys.add((product.client_name, product.mix.name))
        for key in candidate_keys:
            products_by_formula_key.setdefault(key, []).append(product)

    for key, formula in product_ingredient_rows.items():
        matched_products = products_by_formula_key.get(key, [])
        if not matched_products:
            continue

        for product in matched_products:
            existing_ingredients = {
                ingredient.raw_material_id: ingredient
                for ingredient in db.scalars(select(ProductIngredient).where(ProductIngredient.product_id == product.id)).all()
            }
            desired_ids: set[int] = set()

            for row in formula["ingredients"]:
                raw_material = raw_material_map.get(row["raw_material_name"])
                if raw_material is None:
                    continue

                desired_ids.add(raw_material.id)
                ingredient = existing_ingredients.get(raw_material.id)
                if ingredient is None:
                    db.add(
                        ProductIngredient(
                            tenant_id=TENANT_ID,
                            product_id=product.id,
                            raw_material_id=raw_material.id,
                            quantity_kg=row["quantity_kg"],
                            sort_order=row["sort_order"],
                        )
                    )
                else:
                    ingredient.quantity_kg = row["quantity_kg"]
                    ingredient.sort_order = row["sort_order"]

            for raw_material_id, ingredient in existing_ingredients.items():
                if raw_material_id not in desired_ids:
                    db.delete(ingredient)


def seed_product_ingredients_from_workbook(db) -> dict[str, int]:
    """Backfill row-specific product formulas for databases seeded before this table existed."""
    try:
        formula_workbook = _load_workbook("mix_quantites_per_client_per_pr")
    except FileNotFoundError:
        logger.info("Skipping product ingredient backfill because formula workbook is missing")
        return {"formulas": 0, "products_with_formulas": 0, "backfilled": 0}
    product_ingredient_rows = _read_product_ingredient_rows(formula_workbook)
    if not product_ingredient_rows:
        return {"formulas": 0, "products_with_formulas": 0, "backfilled": 0}

    raw_material_map = {
        material.name: material
        for material in db.scalars(select(RawMaterial).where(RawMaterial.tenant_id == TENANT_ID)).all()
    }
    if not raw_material_map:
        return {"formulas": len(product_ingredient_rows), "products_with_formulas": 0, "backfilled": 0}

    had_product_ingredients = (
        db.scalar(select(ProductIngredient.id).where(ProductIngredient.tenant_id == TENANT_ID).limit(1)) is not None
    )
    _upsert_product_ingredients(
        db,
        product_rows=[],
        product_ingredient_rows=product_ingredient_rows,
        raw_material_map=raw_material_map,
    )
    db.flush()

    products_with_formulas = db.scalar(
        select(func.count(func.distinct(ProductIngredient.product_id))).where(ProductIngredient.tenant_id == TENANT_ID)
    )
    return {
        "formulas": len(product_ingredient_rows),
        "products_with_formulas": int(products_with_formulas or 0),
        "backfilled": 0 if had_product_ingredients else int(products_with_formulas or 0),
    }


def _infer_throughput_bag_size(product: Product) -> float | None:
    if product.sale_type == "bulka":
        return None
    unit = (product.unit_of_measure or "").strip().lower()
    match = re.search(r"(\d+(?:\.\d+)?)\s*kg", unit)
    if match:
        return float(match.group(1))
    if unit == "kg":
        return 1.0
    if unit == "tonne":
        return 1000.0
    return None


def _infer_throughput_bulka_default(product: Product) -> bool:
    unit = (product.unit_of_measure or "").lower()
    return product.sale_type == "bulka" or "bulka" in product.name.lower() or "bulka" in unit


def seed_throughput_products_from_costing(db) -> dict[str, int]:
    """Mirror costing products into the throughput product dropdown."""
    costing_products = db.scalars(
        select(Product)
        .where(Product.tenant_id == TENANT_ID)
        .order_by(Product.name, Product.id)
    ).all()
    if not costing_products:
        return {"created": 0, "updated": 0, "skipped": 0}

    throughput_products = db.scalars(
        select(ThroughputProduct).where(ThroughputProduct.tenant_id == TENANT_ID)
    ).all()
    by_item = {
        throughput_product.item_id: throughput_product
        for throughput_product in throughput_products
        if throughput_product.item_id
    }
    by_name = {
        throughput_product.name.strip().lower(): throughput_product
        for throughput_product in throughput_products
        if throughput_product.name
    }

    created = 0
    updated = 0
    skipped = 0
    seen_item_ids: set[str] = set()
    seen_names: set[str] = set()
    for costing_product in costing_products:
        name = (costing_product.name or "").strip()
        if not name:
            skipped += 1
            continue

        item_id = (costing_product.item_id or "").strip() or None
        name_key = name.lower()
        if item_id and item_id in seen_item_ids:
            skipped += 1
            continue
        if not item_id and name_key in seen_names:
            skipped += 1
            continue
        if item_id:
            seen_item_ids.add(item_id)
        seen_names.add(name_key)

        default_bag_size = _infer_throughput_bag_size(costing_product)
        is_bulka_default = _infer_throughput_bulka_default(costing_product)
        client_name = (costing_product.client_name or "").strip() or None
        product = (by_item.get(item_id) if item_id else None) or by_name.get(name_key)
        if product is None:
            product = ThroughputProduct(
                tenant_id=TENANT_ID,
                item_id=item_id,
                name=name,
                client_name=client_name,
                default_bag_size=default_bag_size,
                is_bulka_default=is_bulka_default,
                # Every costing SKU should be selectable in the throughput picker
                # (the Client filter + search keep the long list manageable).
                active=True,
                is_stock_item=True,
                notes="Seeded from costing products",
            )
            db.add(product)
            created += 1
            if item_id:
                by_item[item_id] = product
            by_name[name_key] = product
            continue

        changed = False
        if item_id and product.item_id != item_id:
            product.item_id = item_id
            changed = True
        if product.name != name:
            old_name_key = product.name.strip().lower() if product.name else None
            product.name = name
            if old_name_key:
                by_name.pop(old_name_key, None)
            by_name[name_key] = product
            changed = True
        if product.default_bag_size != default_bag_size:
            product.default_bag_size = default_bag_size
            changed = True
        if product.is_bulka_default != is_bulka_default:
            product.is_bulka_default = is_bulka_default
            changed = True
        if product.client_name != client_name:
            product.client_name = client_name
            changed = True
        if product.active is not True:
            product.active = True
            changed = True
        if product.is_stock_item is not True:
            product.is_stock_item = True
            changed = True
        if product.notes in {None, "", "Seeded from costing products"}:
            product.notes = "Seeded from costing products"
        if changed:
            updated += 1

    db.flush()
    return {"created": created, "updated": updated, "skipped": skipped}


def seed_client_access(db):
    existing = db.scalar(select(ClientAccount.id))
    if existing is not None:
        return

    specialty = ClientAccount(
        tenant_id=TENANT_ID,
        name="Hunter Premium Produce",
        client_code="HPP",
        status="active",
        powerbi_workspace="hunter-premium-produce-prod",
        notes="Primary production client for the Lean 101 admin and access workflows",
    )
    loft = ClientAccount(
        tenant_id="loft-grains",
        name="Loft Grains",
        client_code="LOFT",
        status="onboarding",
        powerbi_workspace="farm-ops-sandbox",
        notes="Onboarding workspace used to test staged user enablement",
    )

    db.add_all([specialty, loft])
    db.flush()

    specialty.users.extend(
        [
            ClientUser(
                tenant_id=specialty.tenant_id,
                full_name="Amelia Hart",
                email="operator@example.com",
                role="superadmin",
                status="active",
                is_new_user=False,
                last_login_at=datetime(2026, 4, 24, 11, 30),
            ),
            ClientUser(
                tenant_id=specialty.tenant_id,
                full_name="Ethan Cole",
                email="ethan.cole@hunterpremiumproduce.example",
                role="operator",
                status="invited",
                is_new_user=True,
            ),
        ]
    )
    loft.users.extend(
        [
            ClientUser(
                tenant_id=loft.tenant_id,
                full_name="Ruby Singh",
                email="ruby.singh@loftgrains.example",
                role="viewer",
                status="active",
                is_new_user=False,
                last_login_at=datetime(2026, 4, 22, 9, 10),
            )
        ]
    )

    enabled_feature_map = {
        TENANT_ID: {"dashboard", "raw_materials", "mix_master", "mix_calculator", "products", "scenarios", "powerbi_export", "client_access", "operations_throughput"},
        "loft-grains": {"dashboard", "mix_calculator", "products", "powerbi_export"},
    }

    for client in (specialty, loft):
        enabled_keys = enabled_feature_map[client.tenant_id]
        for feature_key, feature_name, feature_group, description in MODULE_CATALOG:
            client.features.append(
                ClientFeatureAccess(
                    tenant_id=client.tenant_id,
                    feature_key=feature_key,
                    feature_name=feature_name,
                    feature_group=feature_group,
                    description=description,
                    enabled=feature_key in enabled_keys,
                )
            )

        for user in client.users:
            for module_key, _, _, _ in MODULE_CATALOG:
                user.module_permissions.append(
                    ClientUserModulePermission(
                        tenant_id=client.tenant_id,
                        client_account_id=client.id,
                        module_key=module_key,
                        access_level=default_access_level_for_role(user.role, module_key),
                    )
                )

    specialty.audit_events.append(
        ClientAccessAuditEvent(
            tenant_id=specialty.tenant_id,
            actor_type="seed",
            actor_name="Lean 101 Seeder",
            actor_email="system@lean101.local",
            actor_role="system",
            action="client_access.seeded",
            target_type="client_account",
            target_id=specialty.id,
            module_key="client_access",
            summary="Initial client access controls, module permissions, and feature flags were seeded.",
        )
    )


def seed_costing_workspace(db):
    costing_workbook = _load_workbook("C- Raw Products Costs", "M - All", "Product Cost - Price")
    formula_workbook = _load_workbook("mix_quantites_per_client_per_pr")

    raw_material_rows = _read_raw_material_rows(costing_workbook)
    mix_rows = _read_mix_rows(costing_workbook)
    product_rows = _read_product_rows(costing_workbook)
    product_ingredient_rows = _read_product_ingredient_rows(formula_workbook)

    raw_material_map = _upsert_raw_materials(db, raw_material_rows)
    _upsert_process_rules(db, product_rows)
    _upsert_packaging_and_freight_rules(db, product_rows)

    mix_cache: dict[tuple[str, str], Mix] = {}
    for mix_row in mix_rows.values():
        mix = _upsert_mix(
            db,
            client_name=mix_row["client_name"],
            mix_name=mix_row["name"],
            ingredients=mix_row["ingredients"],
            raw_material_map=raw_material_map,
            mix_cache=mix_cache,
        )
        mix_cache[(mix_row["client_name"], mix_row["name"])] = mix

    _upsert_products(db, product_rows, mix_cache, raw_material_map)
    _upsert_product_ingredients(
        db,
        product_rows=product_rows,
        product_ingredient_rows=product_ingredient_rows,
        raw_material_map=raw_material_map,
    )


def seed_throughput_workbook(db):
    """Import the Operations Throughput workbook on first run if tables are empty."""
    has_products = db.scalar(select(ThroughputProduct.id)) is not None
    has_entries = db.scalar(select(ProductionThroughput.id)) is not None
    if not has_products and not has_entries:
        workbook_path = resolve_throughput_workbook_path()
        if workbook_path is None:
            logger.info("Operations Throughput workbook not found; seeding throughput products from costing products")
        else:
            try:
                report = import_throughput_workbook(db, workbook_path, TENANT_ID)
            except Exception:
                logger.exception("Failed to seed Operations Throughput workbook from %s", workbook_path)
            else:
                logger.info("Operations Throughput seeded from %s: %s", workbook_path, report)

    report = seed_throughput_products_from_costing(db)
    if any(report.values()):
        logger.info("Throughput products synced from costing products: %s", report)


def seed_throughput_products(db):
    """Sync throughput products from costing products without importing historical entries."""
    report = seed_throughput_products_from_costing(db)
    if any(report.values()):
        logger.info("Throughput products synced from costing products: %s", report)
        return


def seed_startup_basics():
    Base.metadata.create_all(bind=engine)
    with SessionLocal() as db:
        seed_client_access(db)
        seed_access(db)
        seed_throughput_workbook(db)
        report = seed_product_ingredients_from_workbook(db)
        if report["backfilled"]:
            logger.info("Product ingredients backfilled from workbook: %s", report)
        db.commit()


def seed_all():
    Base.metadata.create_all(bind=engine)
    with SessionLocal() as db:
        workbook_path = _resolve_workbook_path()
        if workbook_path.exists():
            seed_costing_workspace(db)
        else:
            logger.warning(
                "Skipping costing workspace seed because workbook is missing. Checked: %s",
                ", ".join(str(path) for path in _workbook_candidates()),
            )
        seed_throughput_products(db)
        seed_client_access(db)
        seed_access(db)
        db.commit()


def seed_if_empty():
    Base.metadata.create_all(bind=engine)
    with SessionLocal() as db:
        if db.scalar(select(RawMaterial.id)) is None:
            workbook_path = _resolve_workbook_path()
            if workbook_path.exists():
                seed_costing_workspace(db)
            else:
                logger.warning(
                    "Skipping costing workspace seed because workbook is missing. Checked: %s",
                    ", ".join(str(path) for path in _workbook_candidates()),
                )
        seed_throughput_products(db)
        seed_client_access(db)
        seed_access(db)
        db.commit()


if __name__ == "__main__":
    seed_all()