analysis_engine.py

"""
analysis_engine.py — Core analytical layer for the SHEQ reporting tool.

Accepts normalised DataFrames from data_loader and produces a structured
results dict consumed by report_builder.  All analysis is performed here;
report_builder only formats and writes.

Public API
----------
run_full_analysis(events, safety_energy, llc, start_date, split_date,
                  pd1_name, pd2_name, output_dir) -> AnalysisResults
"""

from __future__ import annotations

import logging
import os
import re
import warnings
from collections import Counter
from dataclasses import dataclass, field
from typing import Any, Optional

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd

from config import (
    CHART_PALETTE,
    DEEP_BLUE, SKY_BLUE, DARK_GREEN, MID_GREEN, LIGHT_GREEN,
    PURPLE, AMBER, RED, MUTED,
    CONSEQUENCE_ORDER, CONSEQUENCE_SERIOUS,
    LEADING_ACTIVITY_TYPES, ACTIVITY_COLOURS,
    AT_RISK_KEYWORDS,
    CORR_MIN_MONTHS, LEADER_MIN_ACTIVITIES,
    TWO_YEAR_WINDOW_MONTHS, QUALITY_SCORE_BANDS,
)

log = logging.getLogger(__name__)
warnings.filterwarnings("ignore", category=UserWarning)


# ─────────────────────────────────────────────────────────────────────────────
# Result container
# ─────────────────────────────────────────────────────────────────────────────

@dataclass
class AnalysisResults:
    """All analysis outputs passed to report_builder."""
    params:          dict[str, Any]           = field(default_factory=dict)
    data_quality:    dict[str, Any]           = field(default_factory=dict)
    events_summary:  dict[str, Any]           = field(default_factory=dict)
    leading_summary: dict[str, Any]           = field(default_factory=dict)
    trends:          dict[str, Any]           = field(default_factory=dict)
    effectiveness:   dict[str, Any]           = field(default_factory=dict)
    at_risk:         dict[str, Any]           = field(default_factory=dict)
    se_events_rel:   dict[str, Any]           = field(default_factory=dict)
    focus_areas:     dict[str, Any]           = field(default_factory=dict)
    charts:          dict[str, str]           = field(default_factory=dict)  # name → file path
    recommendations: list[str]               = field(default_factory=list)
    caveats:         list[str]               = field(default_factory=list)


# ─────────────────────────────────────────────────────────────────────────────
# Chart helpers
# ─────────────────────────────────────────────────────────────────────────────

def _setup_style() -> None:
    """Apply brand-aligned matplotlib defaults."""
    import matplotlib.font_manager as fm
    available = {f.name for f in fm.fontManager.ttflist}
    if "Source Sans Pro" in available:
        plt.rcParams["font.family"] = "Source Sans Pro"
    elif "Source Sans 3" in available:
        plt.rcParams["font.family"] = "Source Sans 3"
    else:
        plt.rcParams["font.family"] = "sans-serif"
    plt.rcParams.update({
        "axes.spines.top":    False,
        "axes.spines.right":  False,
        "axes.grid":          True,
        "grid.alpha":         0.3,
        "grid.linestyle":     "--",
    })


def _save(fig: plt.Figure, path: str) -> None:
    fig.tight_layout()
    fig.savefig(path, dpi=180, bbox_inches="tight", facecolor="white")
    plt.close(fig)


def _month_labels(periods: pd.PeriodIndex) -> list[str]:
    return [p.strftime("%b %y") for p in periods]


QUALITY_TEXT_COLUMNS = [
    "module_name",
    "llc_topic",
    "at_risk_obs",
    "positive_obs",
    "at_risk_crp",
    "Immediate Actions Taken / Comments",
    "Instruction",
    "Top practices",
    "Top improvement opportunities",
    "Review & Action",
    "Best practices shared with site leaders",
    "Activity/Task",
    "Custom",
]

INPUT_DEPTH_BASE_FIELDS = [
    "module_name",
    "module_prefix",
    "leader",
    "business_unit",
    "project",
    "location",
    "shift",
]

INPUT_DEPTH_OPTIONAL_FIELDS = [
    "participants",
    "time_spent",
    "at_risk_crp",
    "llc_topic",
    "at_risk_obs",
    "positive_obs",
    "find_fix",
    "Immediate Actions Taken / Comments",
    "Instruction",
    "Top practices",
    "Top improvement opportunities",
    "Review & Action",
    "Best practices shared with site leaders",
    "Activity/Task",
    "Custom",
]

INPUT_DEPTH_NUMERIC_FIELDS = [
    "at_risk_aspects",
    "total_questions",
    "actions",
    "atl_actions",
]

SEMANTIC_EMPTY_STRINGS = {
    "", "n/a", "na", "nan", "nil", "none", "null", "unknown", "not applicable",
    "no", "no risk identified", "no at risk identified", "no at risk situations identified",
}

ACTION_WORDS = {
    "action", "address", "brief", "coach", "control", "correct", "escalate",
    "fix", "follow", "improve", "implement", "isolate", "monitor", "plan",
    "rectify", "reinforce", "repair", "replace", "review", "stop", "train",
    "update", "verify",
}

LEARNING_WORDS = {
    "awareness", "coach", "coaching", "discuss", "discussed", "education",
    "explained", "feedback", "learn", "learning", "lesson", "mentor",
    "reinforce", "reinforced", "reminded", "shared", "understand",
}

REACTIVE_WORDS = {
    "breach", "defect", "failure", "incident", "issue", "non-compliance",
    "not in place", "overdue", "unsafe", "failed",
}

PREVENTIVE_WORDS = {
    "before", "brief", "coaching", "planned", "pre-start", "prepare",
    "proactive", "reinforce", "review", "verify",
}

GENERIC_PATTERNS = [
    "all good",
    "n/a",
    "na",
    "nil",
    "none",
    "no issues",
    "no at risk situations identified",
    "no at risk identified",
    "nothing noted",
    "routine check",
]


def _safe_pct(numerator: float, denominator: float) -> float:
    if denominator in (0, 0.0) or pd.isna(denominator):
        return 0.0
    return float(numerator) / float(denominator) * 100.0


def _normalise_text(text: Any) -> str:
    if pd.isna(text):
        return ""
    return re.sub(r"\s+", " ", str(text)).strip()


def _is_meaningful_text(value: Any) -> bool:
    text = _normalise_text(value).lower()
    if not text:
        return False
    if text in SEMANTIC_EMPTY_STRINGS:
        return False
    return True


def _tokenise(text: str) -> list[str]:
    return re.findall(r"[a-zA-Z][a-zA-Z0-9/&'-]+", text.lower())


def _theme_matches(text: str) -> set[str]:
    tl = text.lower()
    return {
        theme for theme, keywords in AT_RISK_KEYWORDS.items()
        if any(kw in tl for kw in keywords)
    }


def _top_dict(series: pd.Series, limit: int = 10) -> dict[str, int]:
    if series.empty:
        return {}
    cleaned = series.dropna().astype(str).str.strip()
    cleaned = cleaned[cleaned.ne("") & cleaned.ne("nan")]
    return cleaned.value_counts().head(limit).to_dict()


def _pick_window_start(max_date: pd.Timestamp) -> pd.Timestamp:
    return (max_date.to_period("M") - (TWO_YEAR_WINDOW_MONTHS - 1)).to_timestamp()


def _build_quality_frame(se_window: pd.DataFrame) -> pd.DataFrame:
    """Create practical quality proxies for each Safety Energy record."""
    if se_window.empty:
        return se_window.copy()

    df = se_window.copy()
    text_cols = [c for c in QUALITY_TEXT_COLUMNS if c in df.columns]

    if text_cols:
        df["_text_blob"] = (
            df[text_cols]
            .fillna("")
            .astype(str)
            .agg(" ".join, axis=1)
            .map(_normalise_text)
        )
    else:
        df["_text_blob"] = ""

    df["_tokens"] = df["_text_blob"].map(_tokenise)
    df["_word_count"] = df["_tokens"].map(len)
    df["_unique_words"] = df["_tokens"].map(lambda toks: len(set(toks)))
    df["_contains_number"] = df["_text_blob"].str.contains(r"\d", regex=True, na=False)
    df["_theme_count"] = df["_text_blob"].map(lambda t: len(_theme_matches(t)))
    df["_action_words"] = df["_tokens"].map(lambda toks: len(set(toks) & ACTION_WORDS))
    df["_learning_words"] = df["_tokens"].map(lambda toks: len(set(toks) & LEARNING_WORDS))
    df["_reactive_words"] = df["_text_blob"].str.lower().map(
        lambda txt: sum(1 for word in REACTIVE_WORDS if word in txt)
    )
    df["_preventive_words"] = df["_text_blob"].str.lower().map(
        lambda txt: sum(1 for word in PREVENTIVE_WORDS if word in txt)
    )
    df["_generic_flag"] = (
        (df["_word_count"] <= 4)
        | df["_text_blob"].str.lower().map(lambda txt: any(p in txt for p in GENERIC_PATTERNS))
    )

    total_q = pd.to_numeric(df.get("total_questions"), errors="coerce").fillna(0)
    at_risk = pd.to_numeric(df.get("at_risk_aspects"), errors="coerce").fillna(0)
    actions = pd.to_numeric(df.get("actions"), errors="coerce").fillna(0)
    atl_actions = pd.to_numeric(df.get("atl_actions"), errors="coerce").fillna(0)

    base_fields = [c for c in INPUT_DEPTH_BASE_FIELDS if c in df.columns]
    optional_fields = [c for c in INPUT_DEPTH_OPTIONAL_FIELDS if c in df.columns]
    numeric_fields = [c for c in INPUT_DEPTH_NUMERIC_FIELDS if c in df.columns]

    if base_fields:
        df["_base_input_count"] = sum(df[col].map(_is_meaningful_text).astype(int) for col in base_fields)
    else:
        df["_base_input_count"] = 0

    if optional_fields:
        df["_optional_input_count"] = sum(df[col].map(_is_meaningful_text).astype(int) for col in optional_fields)
    else:
        df["_optional_input_count"] = 0

    numeric_presence = []
    for col in numeric_fields:
        vals = pd.to_numeric(df[col], errors="coerce").fillna(0)
        if col == "total_questions":
            numeric_presence.append(vals.gt(0).astype(int))
        else:
            numeric_presence.append(vals.gt(0).astype(int))
    df["_numeric_input_count"] = sum(numeric_presence) if numeric_presence else 0

    max_points = max(1, len(base_fields) + len(optional_fields) + len(numeric_fields))
    weighted_points = (
        df["_base_input_count"] * 1.0
        + df["_optional_input_count"] * 1.2
        + df["_numeric_input_count"] * 1.0
    )
    weighted_max = max(
        1.0,
        len(base_fields) * 1.0 + len(optional_fields) * 1.2 + len(numeric_fields) * 1.0,
    )
    df["input_depth_score"] = np.clip((weighted_points / weighted_max) * 100.0, 0, 100).round(1)
    df["input_depth_band"] = np.select(
        [
            df["input_depth_score"] >= 60,
            df["input_depth_score"] >= 40,
            df["input_depth_score"] >= 20,
        ],
        ["Rich", "Balanced", "Light"],
        default="Sparse",
    )

    df["_follow_up_signal"] = (
        (actions + atl_actions > 0)
        | df["_text_blob"].str.lower().str.contains("follow up|review|action|close out|escalat", regex=True, na=False)
    )
    df["_risk_signal"] = (
        (at_risk > 0)
        | df["_theme_count"].gt(0)
        | df["_text_blob"].str.lower().str.contains("critical risk|hazard|unsafe|control", regex=True, na=False)
    )
    df["_critical_control_signal"] = df.get(
        "Was a critical risk identified and controls verified as effective and in place?",
        pd.Series(index=df.index, dtype="object"),
    ).astype(str).str.lower().str.contains("yes|effective|verified", regex=True, na=False)

    norm_text = (
        df["_text_blob"].str.lower()
        .str.replace(r"[^a-z0-9 ]", " ", regex=True)
        .str.replace(r"\s+", " ", regex=True)
        .str.strip()
    )
    freq = norm_text[norm_text.ne("")].value_counts()
    df["_duplicate_flag"] = norm_text.map(freq).fillna(0).ge(3) & df["_word_count"].ge(5)

    richness = (
        np.where(df["_word_count"] >= 35, 22,
        np.where(df["_word_count"] >= 20, 18,
        np.where(df["_word_count"] >= 10, 12,
        np.where(df["_word_count"] >= 5, 6, 0))))
    )
    specificity = (
        np.where(df["_unique_words"] >= 18, 10, np.where(df["_unique_words"] >= 10, 6, 2))
        + np.where(df["_contains_number"], 4, 0)
        + np.where(df["_theme_count"] >= 2, 4, np.where(df["_theme_count"] == 1, 2, 0))
    )
    action_score = (
        np.where(actions + atl_actions >= 2, 12, np.where(actions + atl_actions == 1, 8, 0))
        + np.where(df["_action_words"] >= 2, 6, np.where(df["_action_words"] == 1, 3, 0))
    )
    learning_score = (
        np.where(df["_learning_words"] >= 2, 10, np.where(df["_learning_words"] == 1, 6, 0))
        + np.where(df["_text_blob"].str.lower().str.contains("best practice|lesson|learning|feedback", regex=True, na=False), 4, 0)
    )
    risk_score = (
        np.where(df["_risk_signal"], 8, 0)
        + np.where(at_risk >= 2, 6, np.where(at_risk == 1, 3, 0))
        + np.where(df["_critical_control_signal"], 4, 0)
        + np.where((total_q > 0) & ((at_risk / total_q.replace(0, np.nan)).fillna(0) >= 0.1), 2, 0)
    )
    follow_up_score = (
        np.where(df["_follow_up_signal"], 8, 0)
        + np.where(df["_text_blob"].str.lower().str.contains("close out|owner|due|monitor", regex=True, na=False), 4, 0)
    )
    penalty = (
        np.where(df["_generic_flag"], 10, 0)
        + np.where(df["_duplicate_flag"], 8, 0)
        + np.where((df["_word_count"] <= 6) & (~df["_follow_up_signal"]) & (~df["_risk_signal"]), 8, 0)
    )

    df["quality_score"] = np.clip(
        richness + specificity + action_score + learning_score + risk_score + follow_up_score - penalty,
        0, 100,
    ).astype(int)

    df["meaningful_flag"] = df["quality_score"] >= QUALITY_SCORE_BANDS["meaningful"]
    df["high_value_flag"] = df["quality_score"] >= QUALITY_SCORE_BANDS["high_value"]
    df["shallow_flag"] = df["quality_score"] <= QUALITY_SCORE_BANDS["shallow"]
    df["reactive_flag"] = (
        (df["_reactive_words"] > df["_preventive_words"])
        | ((actions + atl_actions > 0) & at_risk.gt(0))
    )
    df["preventive_flag"] = (
        (df["_preventive_words"] >= df["_reactive_words"])
        & df["_risk_signal"]
        & ~df["shallow_flag"]
    )
    df["repetitive_flag"] = df["_duplicate_flag"]

    def _band(score: int) -> str:
        if score >= QUALITY_SCORE_BANDS["high_value"]:
            return "High value"
        if score >= QUALITY_SCORE_BANDS["meaningful"]:
            return "Meaningful"
        if score <= QUALITY_SCORE_BANDS["shallow"]:
            return "Shallow"
        return "Mixed"

    df["quality_band"] = df["quality_score"].map(_band)
    return df


def _summarise_quality_slice(df: pd.DataFrame) -> dict[str, Any]:
    if df.empty:
        return {
            "count": 0,
            "avg_quality": 0.0,
            "avg_input_depth": 0.0,
            "meaningful_pct": 0.0,
            "high_value_pct": 0.0,
            "shallow_pct": 0.0,
            "reactive_pct": 0.0,
            "preventive_pct": 0.0,
            "repetitive_pct": 0.0,
            "follow_up_pct": 0.0,
            "risk_signal_pct": 0.0,
            "rich_input_pct": 0.0,
        }

    return {
        "count": int(len(df)),
        "avg_quality": round(float(df["quality_score"].mean()), 1),
        "avg_input_depth": round(float(df["input_depth_score"].mean()), 1),
        "meaningful_pct": round(_safe_pct(df["meaningful_flag"].sum(), len(df)), 1),
        "high_value_pct": round(_safe_pct(df["high_value_flag"].sum(), len(df)), 1),
        "shallow_pct": round(_safe_pct(df["shallow_flag"].sum(), len(df)), 1),
        "reactive_pct": round(_safe_pct(df["reactive_flag"].sum(), len(df)), 1),
        "preventive_pct": round(_safe_pct(df["preventive_flag"].sum(), len(df)), 1),
        "repetitive_pct": round(_safe_pct(df["repetitive_flag"].sum(), len(df)), 1),
        "follow_up_pct": round(_safe_pct(df["_follow_up_signal"].sum(), len(df)), 1),
        "risk_signal_pct": round(_safe_pct(df["_risk_signal"].sum(), len(df)), 1),
        "rich_input_pct": round(_safe_pct((df["input_depth_band"] == "Rich").sum(), len(df)), 1),
    }


def _summarise_theme_trend(df: pd.DataFrame, recent_months: int = 6) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
    if df.empty:
        return [], []

    theme_rows: list[dict[str, Any]] = []
    for _, row in df.iterrows():
        blob = row.get("_text_blob", "")
        matches = _theme_matches(blob)
        if not matches:
            module_name = _normalise_text(row.get("module_name", ""))
            if module_name:
                matches = {module_name}
        for theme in matches:
            theme_rows.append({"year_month": row["year_month"], "theme": theme})

    if not theme_rows:
        return [], []

    theme_df = pd.DataFrame(theme_rows)
    monthly = (
        theme_df.groupby(["theme", "year_month"]).size()
        .unstack(fill_value=0)
        .sort_index(axis=1)
    )
    recent_cols = monthly.columns[-recent_months:]
    prior_cols = monthly.columns[-(recent_months * 2):-recent_months]
    if len(recent_cols) == 0:
        return [], []

    rows: list[dict[str, Any]] = []
    for theme, vals in monthly.iterrows():
        recent_avg = float(vals[recent_cols].mean())
        prior_avg = float(vals[prior_cols].mean()) if len(prior_cols) else 0.0
        delta = recent_avg - prior_avg
        if delta == 0:
            continue
        rows.append({
            "theme": str(theme),
            "recent_avg": round(recent_avg, 2),
            "prior_avg": round(prior_avg, 2),
            "delta": round(delta, 2),
        })

    rising = sorted([r for r in rows if r["delta"] > 0], key=lambda r: (-r["delta"], -r["recent_avg"]))[:6]
    declining = sorted([r for r in rows if r["delta"] < 0], key=lambda r: (r["delta"], -r["recent_avg"]))[:6]
    return rising, declining


def _input_depth_insights(df: pd.DataFrame) -> dict[str, Any]:
    if df.empty or "input_depth_score" not in df.columns:
        return {
            "correlation": None,
            "by_band": [],
            "note": "No input-depth insight available.",
        }

    corr = None
    if df["input_depth_score"].nunique() > 1 and df["quality_score"].nunique() > 1:
        corr = round(float(df["input_depth_score"].corr(df["quality_score"])), 2)

    band_order = ["Sparse", "Light", "Balanced", "Rich"]
    grouped = (
        df.groupby("input_depth_band")
        .agg(
            count=("quality_score", "size"),
            avg_quality=("quality_score", "mean"),
            meaningful_pct=("meaningful_flag", lambda s: _safe_pct(s.sum(), len(s))),
            high_value_pct=("high_value_flag", lambda s: _safe_pct(s.sum(), len(s))),
            shallow_pct=("shallow_flag", lambda s: _safe_pct(s.sum(), len(s))),
            avg_input_depth=("input_depth_score", "mean"),
        )
        .reset_index()
    )
    grouped["band_order"] = grouped["input_depth_band"].map({b: i for i, b in enumerate(band_order)})
    grouped = grouped.sort_values("band_order")
    by_band = [
        {
            "band": r["input_depth_band"],
            "count": int(r["count"]),
            "avg_input_depth": round(float(r["avg_input_depth"]), 1),
            "avg_quality": round(float(r["avg_quality"]), 1),
            "meaningful_pct": round(float(r["meaningful_pct"]), 1),
            "high_value_pct": round(float(r["high_value_pct"]), 1),
            "shallow_pct": round(float(r["shallow_pct"]), 1),
        }
        for _, r in grouped.iterrows()
    ]

    note = "Input depth appears usable as a supporting quality metric."
    if corr is None:
        note = "Input depth could not be tested reliably against quality because there was not enough variation."
    elif corr < 0.25:
        note = "Input depth is only weakly aligned with overall quality, so it should remain a secondary metric."
    elif corr < 0.5:
        note = "Input depth is moderately aligned with overall quality and is useful as a supporting metric."

    return {
        "correlation": corr,
        "by_band": by_band,
        "note": note,
    }


def _analyse_two_year_trends(
    se: pd.DataFrame,
    llc: pd.DataFrame,
    events: pd.DataFrame,
    start_date: str,
) -> dict[str, Any]:
    """
    Build a rolling two-year Safety Energy trend and quality view.

    Counts come from Safety Energy. Quality is inferred through practical
    proxies such as text richness, specificity, action/follow-up signals,
    hazard recognition, and repeated generic entries.
    """
    if se.empty:
        return {"note": "No Safety Energy data available for two-year trend analysis."}

    max_date = se["date"].max()
    window_start = _pick_window_start(max_date)
    requested_start = pd.Timestamp(start_date)
    se_window = se[(se["date"] >= window_start) & (se["date"] <= max_date)].copy()
    llc_window = llc[(llc["date"] >= window_start) & (llc["date"] <= max_date)].copy()
    events_window = events[(events["date"] >= window_start) & (events["date"] <= max_date)].copy()

    if se_window.empty:
        return {"note": "No Safety Energy records fall within the rolling two-year window."}

    quality_df = _build_quality_frame(se_window)
    all_months = pd.period_range(
        quality_df["date"].min().to_period("M"),
        quality_df["date"].max().to_period("M"),
        freq="M",
    )
    all_quarters = pd.period_range(
        quality_df["date"].min().to_period("Q"),
        quality_df["date"].max().to_period("Q"),
        freq="Q",
    )

    monthly_counts = (
        quality_df.groupby(["year_month", "activity_type"]).size()
        .unstack(fill_value=0)
        .reindex(all_months, fill_value=0)
    )
    quarterly_counts = (
        quality_df.assign(year_quarter=quality_df["date"].dt.to_period("Q"))
        .groupby(["year_quarter", "activity_type"]).size()
        .unstack(fill_value=0)
        .reindex(all_quarters, fill_value=0)
    )
    monthly_quality = (
        quality_df.groupby(["year_month", "activity_type"])["quality_score"].mean()
        .unstack(fill_value=np.nan)
        .reindex(all_months)
    )

    activity_insights: dict[str, Any] = {}
    quality_rows: list[dict[str, Any]] = []
    bu_snapshots: dict[str, list[dict[str, Any]]] = {}
    low_value_units: list[dict[str, Any]] = []
    depth_insights_by_type: dict[str, Any] = {}

    for atype in LEADING_ACTIVITY_TYPES:
        sub = quality_df[quality_df["activity_type"] == atype].copy()
        if sub.empty:
            continue

        module_topics = _top_dict(sub.get("module_name", pd.Series(dtype="object")), 8)
        text_topics = _top_dict(sub.get("llc_topic", pd.Series(dtype="object")), 8)
        sub_events = (
            events_window[events_window["business_unit"].isin(sub["business_unit"].dropna().unique())]
            if "business_unit" in events_window.columns else pd.DataFrame()
        )
        summary = _summarise_quality_slice(sub)
        depth_insights = _input_depth_insights(sub)
        summary.update({
            "top_modules": module_topics,
            "top_topics": text_topics,
            "avg_at_risk": round(float(pd.to_numeric(sub.get("at_risk_aspects"), errors="coerce").fillna(0).mean()), 2),
            "avg_actions": round(float(pd.to_numeric(sub.get("actions"), errors="coerce").fillna(0).mean()), 2),
        })
        depth_insights_by_type[atype] = depth_insights

        bu_rows: list[dict[str, Any]] = []
        if "business_unit" in sub.columns:
            grouped = (
                sub.groupby("business_unit")
                .agg(
                    count=("quality_score", "size"),
                    avg_quality=("quality_score", "mean"),
                    shallow_pct=("shallow_flag", lambda s: _safe_pct(s.sum(), len(s))),
                    high_value_pct=("high_value_flag", lambda s: _safe_pct(s.sum(), len(s))),
                    repetitive_pct=("repetitive_flag", lambda s: _safe_pct(s.sum(), len(s))),
                )
                .reset_index()
            )
            grouped = grouped[grouped["count"] >= 20].sort_values(["avg_quality", "count"], ascending=[False, False])
            bu_rows = [
                {
                    "business_unit": r["business_unit"],
                    "count": int(r["count"]),
                    "avg_quality": round(float(r["avg_quality"]), 1),
                    "shallow_pct": round(float(r["shallow_pct"]), 1),
                    "high_value_pct": round(float(r["high_value_pct"]), 1),
                    "repetitive_pct": round(float(r["repetitive_pct"]), 1),
                }
                for _, r in grouped.iterrows()
            ]
            bu_snapshots[atype] = bu_rows[:8]
            for row in bu_rows:
                if row["count"] >= 30 and row["shallow_pct"] >= 45:
                    low_value_units.append({
                        "activity_type": atype,
                        "business_unit": row["business_unit"],
                        "count": row["count"],
                        "avg_quality": row["avg_quality"],
                        "shallow_pct": row["shallow_pct"],
                    })

        quality_rows.append({
            "activity_type": atype,
            "count": summary["count"],
            "avg_quality": summary["avg_quality"],
            "avg_input_depth": summary["avg_input_depth"],
            "meaningful_pct": summary["meaningful_pct"],
            "high_value_pct": summary["high_value_pct"],
            "shallow_pct": summary["shallow_pct"],
            "preventive_pct": summary["preventive_pct"],
            "reactive_pct": summary["reactive_pct"],
            "repetitive_pct": summary["repetitive_pct"],
            "follow_up_pct": summary["follow_up_pct"],
            "rich_input_pct": summary["rich_input_pct"],
        })

        yoy = {}
        if len(sub["year"].dropna().unique()) >= 2:
            yearly = sub.groupby("year").agg(
                count=("quality_score", "size"),
                quality=("quality_score", "mean"),
                meaningful=("meaningful_flag", "mean"),
            ).sort_index()
            if len(yearly) >= 2:
                prev = yearly.iloc[-2]
                curr = yearly.iloc[-1]
                yoy = {
                    "count_change_pct": round(_safe_pct(curr["count"] - prev["count"], prev["count"]), 1),
                    "quality_change": round(float(curr["quality"] - prev["quality"]), 1),
                    "meaningful_change_pct": round((float(curr["meaningful"]) - float(prev["meaningful"])) * 100, 1),
                }

        activity_insights[atype] = {
            **summary,
            "top_modules": module_topics,
            "top_topics": text_topics,
            "business_units": bu_rows,
            "input_depth": depth_insights,
            "yoy": yoy,
        }

    overall_themes = Counter()
    for blob in quality_df["_text_blob"]:
        matches = _theme_matches(blob)
        for theme in matches:
            overall_themes[theme] += 1

    ccc_df = quality_df[quality_df["activity_type"] == "CCC"].copy()
    ccc_rising, ccc_declining = _summarise_theme_trend(ccc_df)
    overall_rising, overall_declining = _summarise_theme_trend(quality_df)

    high_volume_low_value = sorted(
        low_value_units,
        key=lambda r: (-r["count"], -r["shallow_pct"], r["avg_quality"]),
    )[:8]

    recurring_modules: list[dict[str, Any]] = []
    if "module_name" in ccc_df.columns:
        module_summary = (
            ccc_df.groupby("module_name")
            .agg(
                count=("quality_score", "size"),
                avg_quality=("quality_score", "mean"),
                repetitive_pct=("repetitive_flag", lambda s: _safe_pct(s.sum(), len(s))),
                shallow_pct=("shallow_flag", lambda s: _safe_pct(s.sum(), len(s))),
            )
            .reset_index()
            .sort_values("count", ascending=False)
        )
        recurring_modules = [
            {
                "module_name": r["module_name"],
                "count": int(r["count"]),
                "avg_quality": round(float(r["avg_quality"]), 1),
                "repetitive_pct": round(float(r["repetitive_pct"]), 1),
                "shallow_pct": round(float(r["shallow_pct"]), 1),
            }
            for _, r in module_summary.head(10).iterrows()
        ]

    monthly_mix = [
        {
            "period": str(period),
            **{atype: int(monthly_counts.loc[period, atype]) if atype in monthly_counts.columns else 0 for atype in LEADING_ACTIVITY_TYPES},
        }
        for period in all_months
    ]

    quality_monthly_rows = [
        {
            "period": str(period),
            **{
                atype: round(float(monthly_quality.loc[period, atype]), 1)
                if atype in monthly_quality.columns and pd.notna(monthly_quality.loc[period, atype]) else None
                for atype in LEADING_ACTIVITY_TYPES
            },
        }
        for period in all_months
    ]

    quarter_rows = [
        {
            "period": str(period),
            **{atype: int(quarterly_counts.loc[period, atype]) if atype in quarterly_counts.columns else 0 for atype in LEADING_ACTIVITY_TYPES},
        }
        for period in all_quarters
    ]

    seasonality = (
        quality_df.assign(month_name=quality_df["date"].dt.month_name())
        .groupby("month_name").size().sort_values(ascending=False)
    )
    overall_input_depth = _input_depth_insights(quality_df)

    executive_summary: list[str] = []
    ccc_summary = activity_insights.get("CCC", {})
    occ_summary = activity_insights.get("OCC", {})
    llc_summary = activity_insights.get("LLC", {})

    if ccc_summary:
        executive_summary.append(
            f"CCCs averaged a quality score of {ccc_summary.get('avg_quality', 0):.1f}/100 over the last "
            f"{len(all_months)} months, with {ccc_summary.get('shallow_pct', 0):.1f}% assessed as shallow "
            f"and {ccc_summary.get('high_value_pct', 0):.1f}% assessed as high value."
        )
    if high_volume_low_value:
        hv = high_volume_low_value[0]
        executive_summary.append(
            f"{hv['business_unit']} shows a high-volume / low-value pattern in {hv['activity_type']} activity: "
            f"{hv['count']} records with average quality {hv['avg_quality']:.1f} and {hv['shallow_pct']:.1f}% shallow entries."
        )
    if overall_rising:
        executive_summary.append(
            f"Emerging Safety Energy themes in the recent six months include "
            f"{', '.join(r['theme'] for r in overall_rising[:3])}."
        )
    if llc_summary and occ_summary:
        stronger = "LLC" if llc_summary.get("avg_quality", 0) >= occ_summary.get("avg_quality", 0) else "OCC"
        executive_summary.append(
            f"{stronger} records currently show the strongest overall documentation quality among the three leading activity types."
        )
    if overall_input_depth.get("correlation") is not None:
        executive_summary.append(
            f"Input depth and quality are correlated at r = {overall_input_depth['correlation']:.2f}, indicating that fuller records are a useful supporting signal for activity quality."
        )

    leadership_focus: list[str] = []
    if ccc_summary.get("repetitive_pct", 0) >= 20:
        leadership_focus.append(
            "CCC records show a material level of repeated or duplicated wording, suggesting some checks may be drifting toward compliance-only completion."
        )
    if ccc_declining:
        leadership_focus.append(
            f"CCC focus on {', '.join(item['theme'] for item in ccc_declining[:3])} has reduced in the recent six months; confirm this is intentional rather than a blind spot."
        )
    if overall_declining:
        leadership_focus.append(
            f"Previously visible themes such as {', '.join(item['theme'] for item in overall_declining[:3])} are appearing less often in recorded activity narratives."
        )
    if ccc_summary.get("follow_up_pct", 0) < 35:
        leadership_focus.append(
            "A relatively low share of CCCs contain clear follow-up or close-out signals, which weakens the evidence that issues identified in checks are being converted into learning and action."
        )
    if overall_input_depth.get("correlation") is not None and overall_input_depth["correlation"] >= 0.4:
        leadership_focus.append(
            "Rows with richer input depth are materially more likely to read as meaningful records, so populated-field depth can be used as a practical early warning metric for declining quality."
        )
    if not leadership_focus:
        leadership_focus.append(
            "No dominant low-value pattern was detected across the full two-year window, but monthly quality should still be monitored for slippage."
        )

    recommendations = [
        "Use CCC quality, not just CCC count, as a leadership KPI. Track shallow-entry rate, follow-up rate, and repeated wording monthly.",
        "Review the highest-volume low-value Business Units with their leaders and sample the underlying records to confirm whether quality concerns are real or data-entry related.",
        "Push recurring CCC/OCC themes that show little improvement into board-level focus areas where repeated exposure is visible but learning evidence is weak.",
    ]

    return {
        "window_start": window_start.strftime("%Y-%m-%d"),
        "window_end": max_date.strftime("%Y-%m-%d"),
        "window_months": int(len(all_months)),
        "requested_start_date": requested_start.strftime("%Y-%m-%d"),
        "monthly_mix": monthly_mix,
        "quarterly_mix": quarter_rows,
        "monthly_quality": quality_monthly_rows,
        "quality_by_type": quality_rows,
        "activity_insights": activity_insights,
        "input_depth": overall_input_depth,
        "input_depth_by_type": depth_insights_by_type,
        "top_themes": dict(overall_themes.most_common(10)),
        "rising_themes": overall_rising,
        "declining_themes": overall_declining,
        "ccc_rising_themes": ccc_rising,
        "ccc_declining_themes": ccc_declining,
        "ccc_recurring_modules": recurring_modules,
        "high_volume_low_value": high_volume_low_value,
        "bu_quality_snapshots": bu_snapshots,
        "seasonality": {k: int(v) for k, v in seasonality.head(6).items()},
        "executive_summary": executive_summary,
        "leadership_focus": leadership_focus,
        "recommendations": recommendations,
        "proxy_note": (
            "Quality is inferred using practical proxies: richness and specificity of text, risk recognition, "
            "action/follow-up language, evidence of learning, input depth across useful fields, and penalties for generic or repeated wording. "
            "These scores indicate likely value, not definitive assurance."
        ),
        "note": (
            "The deeper Safety Energy analysis uses a rolling two-year window ending on the latest Safety Energy record. "
            "Counts come from Safety_Energy.xlsx; LLC_Data is used separately elsewhere for richer LLC theme detail."
        ),
        "_quality_df": quality_df,
        "_llc_window": llc_window,
    }


# ─────────────────────────────────────────────────────────────────────────────
# Data quality profiling
# ─────────────────────────────────────────────────────────────────────────────

def _profile_data_quality(
    events: pd.DataFrame,
    se: pd.DataFrame,
    llc: pd.DataFrame,
) -> dict[str, Any]:
    """
    Summarise row counts, date coverage, and null rates for key fields.
    """
    def _date_range(df: pd.DataFrame) -> tuple[str, str]:
        mn = df["date"].min()
        mx = df["date"].max()
        return (
            mn.strftime("%d %b %Y") if pd.notna(mn) else "N/A",
            mx.strftime("%d %b %Y") if pd.notna(mx) else "N/A",
        )

    def _null_pct(df: pd.DataFrame, col: str) -> str:
        if col not in df.columns:
            return "N/A"
        return f"{df[col].isna().mean() * 100:.1f}%"

    ev_range  = _date_range(events)
    se_range  = _date_range(se)
    llc_range = _date_range(llc)

    return {
        "events": {
            "rows": len(events),
            "date_from": ev_range[0],
            "date_to":   ev_range[1],
            "null_event_type":  _null_pct(events, "event_type"),
            "null_consequence": _null_pct(events, "consequence"),
            "null_business_unit": _null_pct(events, "business_unit"),
            "null_root_cause":  _null_pct(events, "root_cause_cat"),
            "duplicate_ids":    int(events["EventID"].duplicated().sum()) if "EventID" in events.columns else "N/A",
        },
        "safety_energy": {
            "rows": len(se),
            "date_from": se_range[0],
            "date_to":   se_range[1],
            "type_breakdown": se["activity_type"].value_counts().to_dict() if "activity_type" in se else {},
            "null_leader":  _null_pct(se, "leader"),
            "null_bu":      _null_pct(se, "business_unit"),
        },
        "llc": {
            "rows": len(llc),
            "date_from": llc_range[0],
            "date_to":   llc_range[1],
            "null_topic":  _null_pct(llc, "topic"),
            "null_leader": _null_pct(llc, "leader"),
        },
    }


# ─────────────────────────────────────────────────────────────────────────────
# Events analysis
# ─────────────────────────────────────────────────────────────────────────────

def _parse_time_to_hour(value: Any) -> Optional[int]:
    if pd.isna(value):
        return None
    text = str(value).strip()
    if not text or text.lower() == "nan":
        return None
    parsed = pd.to_datetime(text, errors="coerce")
    if pd.notna(parsed):
        return int(parsed.hour)
    match = re.search(r"(\d{1,2}):(\d{2})", text)
    if match:
        return int(match.group(1))
    return None


def _time_bucket(hour: Optional[int]) -> str:
    if hour is None:
        return "Unknown"
    if 0 <= hour < 6:
        return "Night (00:00-05:59)"
    if 6 <= hour < 12:
        return "Morning (06:00-11:59)"
    if 12 <= hour < 18:
        return "Afternoon (12:00-17:59)"
    return "Evening (18:00-23:59)"


def _analyse_events(
    events: pd.DataFrame,
    start_date: str,
    split_date: str,
    pd1_name: str,
    pd2_name: str,
) -> dict[str, Any]:
    """Whole-of-period events view with serious-event, timing, and MVE insight."""

    df = events[events["date"] >= pd.Timestamp(start_date)].copy()
    if df.empty:
        return {"total": 0, "_df": df}

    def _pct(n: int, total: int) -> str:
        return f"{n/max(total,1)*100:.1f}%"

    def _cons_counts(sub: pd.DataFrame) -> dict[str, int]:
        if "consequence" not in sub:
            return {}
        return {c: int((sub["consequence"] == c).sum()) for c in CONSEQUENCE_ORDER}

    def _crp_counts(sub: pd.DataFrame) -> dict[str, int]:
        if "crp" not in sub:
            return {}
        vc = sub["crp"].dropna().astype(str).str.strip().value_counts()
        vc = vc[~vc.index.isin(["None Identified", "Under Investigation", "nan", ""])]
        return vc.head(10).to_dict()

    monthly_all = (
        df.groupby("year_month").size()
        .reindex(
            pd.period_range(df["date"].min().to_period("M"), df["date"].max().to_period("M"), freq="M"),
            fill_value=0,
        )
    )
    months = max(1, len(monthly_all))
    injury_class = df["injury_class"].value_counts().to_dict() if "injury_class" in df.columns else {}
    serious = df[df["consequence"].isin(CONSEQUENCE_SERIOUS)].copy() if "consequence" in df.columns else df.iloc[0:0]

    if "Time of Event" in serious.columns:
        serious["_event_hour"] = serious["Time of Event"].map(_parse_time_to_hour)
        serious["_time_bucket"] = serious["_event_hour"].map(_time_bucket)
        time_bucket_counts = serious["_time_bucket"].value_counts().to_dict()
        time_hour_counts = serious["_event_hour"].dropna().astype(int).value_counts().sort_index().to_dict()
        timed_serious = int(serious["_event_hour"].notna().sum())
    else:
        time_bucket_counts = {}
        time_hour_counts = {}
        timed_serious = 0

    motor = (
        df[df["event_type"].astype(str).str.contains("motor|vehicle|mva|traffic", case=False, na=False)].copy()
        if "event_type" in df.columns else df.iloc[0:0]
    )
    serious_motor = motor[motor["consequence"].isin(CONSEQUENCE_SERIOUS)].copy() if "consequence" in motor.columns else motor.iloc[0:0]

    return {
        "total": len(df),
        "date_from": df["date"].min().strftime("%d %b %Y"),
        "date_to": df["date"].max().strftime("%d %b %Y"),
        "months": months,
        "events_per_month": round(len(df) / months, 1),
        "serious_count": int(len(serious)),
        "serious_pct": _pct(len(serious), len(df)),
        "lti_count": int(injury_class.get("Lost Time Injury", 0)),
        "fai_count": int(injury_class.get("First Aid Treatment", 0)),
        "event_type_counts": df["event_type"].value_counts().to_dict() if "event_type" in df.columns else {},
        "consequence_counts": _cons_counts(df),
        "crp_counts": _crp_counts(df),
        "root_cause_counts": df["root_cause_cat"].value_counts().head(10).to_dict() if "root_cause_cat" in df.columns else {},
        "serious_projects": serious["project"].value_counts().head(8).to_dict() if "project" in serious.columns else {},
        "serious_locations": serious["location"].value_counts().head(8).to_dict() if "location" in serious.columns else {},
        "serious_bus": serious["business_unit"].value_counts().head(8).to_dict() if "business_unit" in serious.columns else {},
        "serious_time_buckets": time_bucket_counts,
        "serious_time_hours": {str(k): int(v) for k, v in time_hour_counts.items()},
        "serious_time_coverage_pct": round(_safe_pct(timed_serious, len(serious)), 1) if len(serious) else 0.0,
        "motor_vehicle": {
            "count": int(len(motor)),
            "pct_total": round(_safe_pct(len(motor), len(df)), 1),
            "serious_count": int(len(serious_motor)),
            "serious_pct_within_mve": round(_safe_pct(len(serious_motor), len(motor)), 1) if len(motor) else 0.0,
            "consequence_counts": _cons_counts(motor),
            "top_projects": motor["project"].value_counts().head(8).to_dict() if "project" in motor.columns else {},
            "top_locations": motor["location"].value_counts().head(8).to_dict() if "location" in motor.columns else {},
            "road_types": _top_dict(motor.get("Road Type", pd.Series(dtype="object")), 6),
            "conditions": _top_dict(motor.get("Road Conditions", pd.Series(dtype="object")), 6),
            "vehicle_types": _top_dict(motor.get("Type of vehicle involved", pd.Series(dtype="object")), 6),
        },
        "monthly_all": {str(k): int(v) for k, v in monthly_all.items()},
        "_df": df,
        "_serious": serious,
        "_motor": motor,
    }


# ─────────────────────────────────────────────────────────────────────────────
# Leading activity analysis
# ─────────────────────────────────────────────────────────────────────────────

def _analyse_leading(
    se: pd.DataFrame,
    llc: pd.DataFrame,
    start_date: str,
) -> dict[str, Any]:
    """
    Summarise leading activities from Safety Energy + LLC Data.

    LLC_Data is used for its richer free-text (topic, CRP focus, observations).
    Safety Energy provides the authoritative counts for all three activity types.
    """
    se_f = se[se["date"] >= pd.Timestamp(start_date)].copy()

    # Monthly counts by activity type
    monthly_by_type: dict[str, dict[str, int]] = {}
    all_months = pd.period_range(se_f["date"].min().to_period("M"),
                                 se_f["date"].max().to_period("M"), freq="M")

    for atype in LEADING_ACTIVITY_TYPES:
        sub = se_f[se_f["activity_type"] == atype]
        monthly = sub.groupby("year_month").size().reindex(all_months, fill_value=0)
        monthly_by_type[atype] = {str(k): int(v) for k, v in monthly.items()}

    # BU breakdown
    bu_by_type: dict[str, dict[str, int]] = {}
    for atype in LEADING_ACTIVITY_TYPES:
        sub = se_f[se_f["activity_type"] == atype]
        if "business_unit" in sub:
            bu_by_type[atype] = sub["business_unit"].value_counts().to_dict()

    # Top leaders (LLC only, from LLC_Data for richer detail)
    llc_f = llc[llc["date"] >= pd.Timestamp(start_date)].copy()
    top_leaders: dict[str, int] = {}
    if "leader" in llc_f:
        top_leaders = (
            llc_f["leader"].dropna().value_counts()
            .head(15).to_dict()
        )

    # LLC topics
    top_topics: dict[str, int] = {}
    if "topic" in llc_f:
        top_topics = (
            llc_f["topic"].dropna()
            .str.strip()
            .value_counts()
            .head(15).to_dict()
        )

    # CRP focus areas in LLCs
    crp_focus: dict[str, int] = {}
    if "crp_focus" in llc_f:
        crp_focus = (
            llc_f["crp_focus"].dropna()
            .str.strip()
            .value_counts()
            .head(10).to_dict()
        )

    # At-risk flags from LLC_Data
    at_risk_total = 0
    if "at_risk_flag" in llc_f:
        at_risk_total = int(llc_f["at_risk_flag"].sum())

    # Overall totals
    totals = se_f["activity_type"].value_counts().to_dict()

    # Average at-risk aspects per activity
    avg_at_risk: dict[str, float] = {}
    if "at_risk_aspects" in se_f:
        for atype in LEADING_ACTIVITY_TYPES:
            sub = se_f[se_f["activity_type"] == atype]
            val = sub["at_risk_aspects"].mean()
            avg_at_risk[atype] = round(float(val), 2) if pd.notna(val) else 0.0

    # Monthly total (all types combined)
    monthly_total = (
        se_f.groupby("year_month").size()
        .reindex(all_months, fill_value=0)
    )

    # Trend direction (slope of last 6 months vs prior 6 months)
    def _trend_dir(series: pd.Series) -> str:
        if len(series) < 4:
            return "insufficient data"
        recent = series.iloc[-min(6, len(series)):]
        prior  = series.iloc[max(0, -12):-6] if len(series) >= 12 else series.iloc[:max(1, len(series)-6)]
        if prior.mean() == 0:
            return "no prior baseline"
        change = (recent.mean() - prior.mean()) / prior.mean() * 100
        if change > 10:
            return f"increasing (+{change:.0f}%)"
        elif change < -10:
            return f"declining ({change:.0f}%)"
        return f"stable ({change:+.0f}%)"

    activity_trend = _trend_dir(monthly_total)

    return {
        "totals":          totals,
        "monthly_by_type": monthly_by_type,
        "monthly_total":   {str(k): int(v) for k, v in monthly_total.items()},
        "bu_by_type":      bu_by_type,
        "top_leaders":     top_leaders,
        "top_topics":      top_topics,
        "crp_focus":       crp_focus,
        "at_risk_total_llc": at_risk_total,
        "avg_at_risk":     avg_at_risk,
        "activity_trend":  activity_trend,
        "all_months":      [str(m) for m in all_months],
        "_se_f": se_f,
        "_llc_f": llc_f,
    }


# ─────────────────────────────────────────────────────────────────────────────
# Effectiveness analysis
# ─────────────────────────────────────────────────────────────────────────────

def _analyse_effectiveness(
    events_result: dict,
    leading_result: dict,
) -> dict[str, Any]:
    """
    Assess whether leading activities appear associated with event outcomes.

    Uses Business Unit as the common grouping dimension.
    All language is deliberately cautious (associated with, may indicate).
    """
    ev_df  = events_result.get("_df")
    se_f   = leading_result.get("_se_f")

    if ev_df is None or se_f is None or "business_unit" not in ev_df.columns:
        return {"note": "Insufficient data for effectiveness analysis."}

    # BU-level: total leading activities vs total events
    bu_activities = se_f.groupby("business_unit").size().rename("activities")
    bu_events     = ev_df.groupby("business_unit").size().rename("events")

    bu_table = pd.concat([bu_activities, bu_events], axis=1).fillna(0)
    bu_table.columns = ["activities", "events"]
    bu_table = bu_table[bu_table["activities"] > 0].sort_values("activities", ascending=False)

    # Monthly correlation: do more activities in month M associate with fewer events?
    # Use a 1-month lag (activities in M, events in M+1)
    monthly_acts  = se_f.groupby("year_month").size()
    monthly_events = ev_df.groupby("year_month").size()

    common_months = monthly_acts.index.intersection(monthly_events.index)
    corr_value: Optional[float] = None
    corr_note = "Insufficient overlapping months for correlation analysis."

    if len(common_months) >= CORR_MIN_MONTHS:
        a_vals = monthly_acts.reindex(common_months, fill_value=0).values
        e_vals = monthly_events.reindex(common_months, fill_value=0).values
        if np.std(a_vals) > 0 and np.std(e_vals) > 0:
            corr_value = float(np.corrcoef(a_vals, e_vals)[0, 1])
            direction = "positive" if corr_value > 0 else "negative"
            strength  = "weak" if abs(corr_value) < 0.3 else ("moderate" if abs(corr_value) < 0.6 else "strong")
            corr_note = (
                f"A {strength} {direction} association (r = {corr_value:.2f}) was observed "
                f"between monthly leading-activity counts and monthly event counts across "
                f"{len(common_months)} overlapping months. "
                + ("This may warrant further review — high activity volumes and high event rates "
                   "in the same periods could reflect reactive activity rather than prevention."
                   if corr_value > 0.3 else
                   "A negative association is consistent with leading activities having a "
                   "preventive effect, though causation cannot be assumed from this data alone."
                   if corr_value < -0.3 else
                   "No strong directional association was identified.")
            )

    # BUs with high activity AND high events (possible reactive pattern)
    high_both: list[str] = []
    high_acts_low_events: list[str] = []

    if len(bu_table) >= 2:
        act_median = bu_table["activities"].median()
        evt_median = bu_table["events"].median()
        for bu, row in bu_table.iterrows():
            if row["activities"] >= act_median and row["events"] >= evt_median:
                high_both.append(str(bu))
            elif row["activities"] >= act_median and row["events"] < evt_median:
                high_acts_low_events.append(str(bu))

    return {
        "bu_table": bu_table.reset_index().to_dict("records"),
        "corr_value": corr_value,
        "corr_note": corr_note,
        "high_activity_high_events": high_both,
        "high_activity_low_events":  high_acts_low_events,
        "note": (
            "Effectiveness analysis uses business unit-level and monthly aggregates. "
            "All associations are indicative only — correlation does not imply causation."
        ),
    }


# ─────────────────────────────────────────────────────────────────────────────
# At-risk behaviour analysis
# ─────────────────────────────────────────────────────────────────────────────

def _extract_at_risk_themes(
    events: pd.DataFrame,
    se: pd.DataFrame,
    llc: pd.DataFrame,
    start_date: str,
) -> dict[str, Any]:
    """
    Extract at-risk behaviour themes using keyword matching against free-text
    fields in LLC_Data, Safety Energy, and Events.

    No cloud APIs; all processing is local.
    """
    ev_f  = events[events["date"] >= pd.Timestamp(start_date)]
    llc_f = llc[llc["date"] >= pd.Timestamp(start_date)]
    se_f  = se[se["date"] >= pd.Timestamp(start_date)]

    # Collect text blobs from each source
    ev_texts  = _collect_text(ev_f,  ["brief_desc", "event_desc", "root_cause_cat"])
    llc_texts = _collect_text(llc_f, ["topic", "at_risk_obs", "crp_focus"])
    se_texts  = _collect_text(se_f,  ["llc_topic", "at_risk_obs"])

    def _score(texts: list[str]) -> dict[str, int]:
        counts: Counter = Counter()
        for text in texts:
            tl = text.lower()
            for theme, keywords in AT_RISK_KEYWORDS.items():
                if any(kw in tl for kw in keywords):
                    counts[theme] += 1
        return dict(counts.most_common())

    ev_themes  = _score(ev_texts)
    llc_themes = _score(llc_texts)
    se_themes  = _score(se_texts)

    # Combine: weight events × 2 (lagging = higher severity signal)
    combined: Counter = Counter()
    for theme, cnt in ev_themes.items():
        combined[theme] += cnt * 2
    for theme, cnt in llc_themes.items():
        combined[theme] += cnt
    for theme, cnt in se_themes.items():
        combined[theme] += cnt

    # Alignment gap: themes prominent in events but absent in LLC discussions
    llc_top    = set(list(llc_themes.keys())[:5])
    events_top = set(list(ev_themes.keys())[:5])
    gap_themes = events_top - llc_top

    # Top LLC topics (free text)
    top_llc_topics: dict[str, int] = {}
    if "topic" in llc_f.columns:
        top_llc_topics = llc_f["topic"].dropna().value_counts().head(10).to_dict()

    # CRP focus in LLCs
    top_crp_focus: dict[str, int] = {}
    if "crp_focus" in llc_f.columns:
        top_crp_focus = llc_f["crp_focus"].dropna().value_counts().head(8).to_dict()

    return {
        "event_themes":    ev_themes,
        "llc_themes":      llc_themes,
        "combined_themes": dict(combined.most_common(10)),
        "gap_themes":      list(gap_themes),
        "top_llc_topics":  top_llc_topics,
        "top_crp_focus":   top_crp_focus,
        "note": (
            "Theme extraction uses keyword matching against free-text fields. "
            "Results are indicative; manual review of underlying records is recommended "
            "before drawing firm conclusions."
        ),
    }


def _collect_text(df: pd.DataFrame, cols: list[str]) -> list[str]:
    """Collect non-null text entries from named columns."""
    texts = []
    for col in cols:
        if col in df.columns:
            texts.extend(df[col].dropna().astype(str).str.strip().tolist())
    return texts


def _compare_dimension(
    events_df: pd.DataFrame,
    se_df: pd.DataFrame,
    dimension: str,
    min_activities: int = 10,
) -> dict[str, Any]:
    if dimension not in events_df.columns or dimension not in se_df.columns:
        return {"table": [], "best": [], "watch": []}

    serious = events_df[events_df["consequence"].isin(CONSEQUENCE_SERIOUS)].copy() if "consequence" in events_df.columns else events_df.iloc[0:0]
    activities = se_df.groupby(dimension).size().rename("activities")
    events = events_df.groupby(dimension).size().rename("events")
    serious_events = serious.groupby(dimension).size().rename("serious_events")
    comp = pd.concat([activities, events, serious_events], axis=1).fillna(0)
    if comp.empty:
        return {"table": [], "best": [], "watch": []}

    comp = comp.astype(int)
    comp["activity_event_ratio"] = comp.apply(
        lambda r: round(r["activities"] / r["events"], 1) if r["events"] > 0 else None,
        axis=1,
    )

    def _rows(df: pd.DataFrame, label: str) -> list[dict[str, Any]]:
        rows = []
        for _, row in df.reset_index().iterrows():
            ratio = row["activity_event_ratio"]
            rows.append({
                label: row[label],
                "activities": int(row["activities"]),
                "events": int(row["events"]),
                "serious_events": int(row["serious_events"]),
                "activity_event_ratio": None if pd.isna(ratio) else ratio,
            })
        return rows

    best = (
        comp[comp["activities"] >= min_activities]
        .sort_values(["serious_events", "events", "activities"], ascending=[True, True, False])
        .head(8)
    )
    watch = comp.sort_values(["serious_events", "events", "activities"], ascending=[False, False, False]).head(8)

    return {
        "table": _rows(comp.sort_values(["activities", "events"], ascending=[False, False]).head(25), dimension),
        "best": _rows(best, dimension),
        "watch": _rows(watch, dimension),
    }


# ─────────────────────────────────────────────────────────────────────────────
# Safety Energy ↔ Events relationship analysis
# ─────────────────────────────────────────────────────────────────────────────

def _analyse_se_events_relationship(
    events: pd.DataFrame,
    se: pd.DataFrame,
    start_date: str,
) -> dict[str, Any]:
    """
    Compare monthly Safety Energy activity levels against Events, overall and
    by Business Unit.  Uses cautious associative language throughout.
    """
    ev_f = events[events["date"] >= pd.Timestamp(start_date)].copy()
    se_f = se[se["date"] >= pd.Timestamp(start_date)].copy()

    # Build common month range
    all_dates = pd.concat([ev_f["date"], se_f["date"]])
    if all_dates.empty:
        return {"note": "No overlapping data to compare."}

    start = all_dates.min().to_period("M")
    end   = all_dates.max().to_period("M")
    all_months = pd.period_range(start, end, freq="M")

    monthly_acts   = se_f.groupby("year_month").size().reindex(all_months, fill_value=0)
    monthly_events = ev_f.groupby("year_month").size().reindex(all_months, fill_value=0)

    # Trend divergence: periods where events spike but activities don't
    spike_months: list[str] = []
    if len(monthly_events) >= 3:
        ev_mean = monthly_events.mean()
        ev_std  = monthly_events.std()
        for period, ev_count in monthly_events.items():
            if ev_count > ev_mean + ev_std:
                act_count = monthly_acts.get(period, 0)
                if act_count < monthly_acts.mean():
                    spike_months.append(str(period))

    # BU comparison table
    bu_comp: list[dict] = []
    if "business_unit" in ev_f and "business_unit" in se_f:
        bu_acts   = se_f.groupby("business_unit").size().rename("activities")
        bu_events = ev_f.groupby("business_unit").size().rename("events")
        merged    = pd.concat([bu_acts, bu_events], axis=1).fillna(0).astype(int)

        # Compute activity-to-event ratio where events > 0
        merged["ratio"] = merged.apply(
            lambda r: round(r["activities"] / r["events"], 1) if r["events"] > 0 else None,
            axis=1,
        )
        bu_comp = merged.reset_index().rename(columns={"index": "business_unit"}).to_dict("records")

    # LLC topic alignment vs event root causes
    llc_top_topics: list[str] = []
    ev_top_rc: list[str] = []
    if "topic" in se_f.columns:
        llc_sub = se_f[se_f["activity_type"] == "LLC"]
        llc_top_topics = llc_sub["llc_topic"].dropna().value_counts().head(5).index.tolist() if "llc_topic" in llc_sub else []
    if "root_cause_cat" in ev_f:
        ev_top_rc = ev_f["root_cause_cat"].dropna().value_counts().head(5).index.tolist()

    return {
        "monthly_acts":   {str(k): int(v) for k, v in monthly_acts.items()},
        "monthly_events": {str(k): int(v) for k, v in monthly_events.items()},
        "spike_months":   spike_months,
        "bu_comparison":  bu_comp,
        "project_comparison": _compare_dimension(ev_f, se_f, "project", min_activities=12),
        "location_comparison": _compare_dimension(ev_f, se_f, "location", min_activities=10),
        "llc_top_topics": llc_top_topics,
        "ev_top_rc":      ev_top_rc,
        "alignment_note": (
            "LLC topic focus and event root causes are compared to identify alignment gaps. "
            "Where event root causes diverge from LLC discussion topics, this may indicate "
            "that leading activity conversations are not yet targeting the highest-risk themes."
        ),
        "note": (
            "Monthly comparison covers periods where both datasets have data. "
            "Short overlapping periods reduce the reliability of any trend observations. "
            "This analysis is associative only."
        ),
    }


# ─────────────────────────────────────────────────────────────────────────────
# Leader and BU focus areas
# ─────────────────────────────────────────────────────────────────────────────

def _analyse_focus_areas(
    events: pd.DataFrame,
    se: pd.DataFrame,
    llc: pd.DataFrame,
    start_date: str,
) -> dict[str, Any]:
    """
    Identify Business Units and leaders warranting leadership attention,
    based on activity volumes, event rates, and declining trends.
    """
    ev_f  = events[events["date"] >= pd.Timestamp(start_date)]
    se_f  = se[se["date"] >= pd.Timestamp(start_date)]
    llc_f = llc[llc["date"] >= pd.Timestamp(start_date)]

    # BU-level activity counts and event counts
    bu_acts: dict[str, int] = {}
    bu_evts: dict[str, int] = {}
    if "business_unit" in se_f:
        bu_acts = se_f.groupby("business_unit").size().to_dict()
    if "business_unit" in ev_f:
        bu_evts = ev_f.groupby("business_unit").size().to_dict()

    all_bus = sorted(set(list(bu_acts.keys()) + list(bu_evts.keys())))
    bu_summary = [
        {
            "business_unit": bu,
            "activities": bu_acts.get(bu, 0),
            "events": bu_evts.get(bu, 0),
        }
        for bu in all_bus
    ]

    # Most active leaders
    leader_counts: dict[str, int] = {}
    if "leader" in se_f:
        leader_counts = (
            se_f["leader"].dropna().value_counts()
            .head(20).to_dict()
        )

    # Leaders with < LEADER_MIN_ACTIVITIES (gap indicator)
    low_activity_leaders: list[str] = [
        l for l, c in leader_counts.items() if c < LEADER_MIN_ACTIVITIES
    ]

    # Declining BUs: compare first half vs second half of date range
    declining_bus: list[str] = []
    if "business_unit" in se_f and len(se_f) > 0:
        mid = se_f["date"].min() + (se_f["date"].max() - se_f["date"].min()) / 2
        for bu in all_bus:
            sub = se_f[se_f["business_unit"] == bu]
            if len(sub) < 4:
                continue
            early = len(sub[sub["date"] <= mid])
            late  = len(sub[sub["date"] > mid])
            if late < early * 0.7:
                declining_bus.append(bu)

    return {
        "bu_summary":           bu_summary,
        "leader_counts":        leader_counts,
        "low_activity_leaders": low_activity_leaders,
        "declining_bus":        declining_bus,
    }


# ─────────────────────────────────────────────────────────────────────────────
# Chart generation
# ─────────────────────────────────────────────────────────────────────────────

def _generate_charts(
    events_res:  dict,
    leading_res: dict,
    se_ev_res:   dict,
    at_risk_res: dict,
    trends_res:  dict,
    output_dir:  str,
    pd1_name:    str,
    pd2_name:    str,
    split_date:  str,
) -> dict[str, str]:
    """Generate all charts and return a dict of name → file path."""
    _setup_style()
    charts: dict[str, str] = {}
    os.makedirs(output_dir, exist_ok=True)

    # ── 1. Events monthly trend ──────────────────────────────────────────────
    try:
        ev_df = events_res.get("_df")
        if ev_df is not None and len(ev_df) > 0:
            all_months = pd.period_range(
                ev_df["date"].min().to_period("M"),
                ev_df["date"].max().to_period("M"), freq="M",
            )
            monthly = ev_df.groupby("year_month").size().reindex(all_months, fill_value=0)
            x = range(len(all_months))
            labels = _month_labels(all_months)

            fig, ax = plt.subplots(figsize=(11, 4))
            vals = monthly.values
            ax.bar(x, vals, color=DEEP_BLUE, width=0.72, alpha=0.9)
            rolling = monthly.rolling(3, min_periods=1).mean().values
            ax.plot(x, rolling, color=SKY_BLUE, linewidth=2.2, marker="o", markersize=3, label="3-month average")

            ax.set_xticks(x)
            ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
            ax.set_title("Monthly Events", fontsize=14,
                         fontweight="bold", color=DEEP_BLUE)
            ax.set_ylabel("Events")
            ax.legend(loc="upper right", fontsize=9)
            p = os.path.join(output_dir, "ch_events_monthly.png")
            _save(fig, p)
            charts["events_monthly"] = p
    except Exception as e:
        log.warning("Chart events_monthly failed: %s", e)

    # ── 2. Leading activities monthly trend (stacked area) ───────────────────
    try:
        monthly_by_type = leading_res.get("monthly_by_type", {})
        all_months_str  = leading_res.get("all_months", [])
        if all_months_str and any(monthly_by_type.values()):
            months_idx = [pd.Period(m) for m in all_months_str]
            x = range(len(months_idx))
            labels = _month_labels(pd.PeriodIndex(months_idx))

            fig, ax = plt.subplots(figsize=(11, 4))
            bottom = np.zeros(len(months_idx))
            for atype in LEADING_ACTIVITY_TYPES:
                vals = np.array([monthly_by_type.get(atype, {}).get(m, 0) for m in all_months_str])
                ax.bar(x, vals, bottom=bottom, color=ACTIVITY_COLOURS[atype],
                       label=atype, width=0.8, alpha=0.9)
                bottom += vals

            ax.set_xticks(x)
            ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
            ax.set_title("Monthly Leading Activities (LLC / CCC / OCC)",
                         fontsize=14, fontweight="bold", color=DEEP_BLUE)
            ax.set_ylabel("Count")
            ax.legend(loc="upper right", fontsize=9)
            p = os.path.join(output_dir, "ch_leading_monthly.png")
            _save(fig, p)
            charts["leading_monthly"] = p
    except Exception as e:
        log.warning("Chart leading_monthly failed: %s", e)

    # ── 3. Activity type mix (donut) ─────────────────────────────────────────
    try:
        totals = leading_res.get("totals", {})
        if totals:
            labels_d = list(totals.keys())
            vals_d   = list(totals.values())
            colours  = [ACTIVITY_COLOURS.get(l, MUTED) for l in labels_d]
            fig, ax = plt.subplots(figsize=(5, 4))
            wedges, _, autotexts = ax.pie(
                vals_d, labels=labels_d, autopct="%1.0f%%",
                colors=colours, startangle=140,
                wedgeprops={"linewidth": 1, "edgecolor": "white"},
            )
            for at in autotexts:
                at.set_fontsize(9)
            ax.set_title("Activity Type Mix", fontsize=13, fontweight="bold", color=DEEP_BLUE)
            p = os.path.join(output_dir, "ch_activity_mix.png")
            _save(fig, p)
            charts["activity_mix"] = p
    except Exception as e:
        log.warning("Chart activity_mix failed: %s", e)

    # ── 4. BU comparison: activities vs events ────────────────────────────────
    try:
        bu_comp = se_ev_res.get("bu_comparison", [])
        if bu_comp:
            df_bu = pd.DataFrame(bu_comp).set_index("business_unit")
            df_bu = df_bu[["activities", "events"]].sort_values("activities", ascending=True)
            y = range(len(df_bu))
            fig, ax = plt.subplots(figsize=(9, max(3, len(df_bu) * 0.6)))
            ax.barh([i - 0.2 for i in y], df_bu["activities"].values,
                    height=0.35, color=DEEP_BLUE, label="Activities")
            ax.barh([i + 0.2 for i in y], df_bu["events"].values,
                    height=0.35, color=RED, label="Events")
            ax.set_yticks(list(y))
            ax.set_yticklabels(df_bu.index.tolist(), fontsize=9)
            ax.set_title("Activities vs Events by Business Unit",
                         fontsize=13, fontweight="bold", color=DEEP_BLUE)
            ax.legend(fontsize=9)
            p = os.path.join(output_dir, "ch_bu_comparison.png")
            _save(fig, p)
            charts["bu_comparison"] = p
    except Exception as e:
        log.warning("Chart bu_comparison failed: %s", e)

    # ── 5. Dual-axis: monthly activities and events overlay ───────────────────
    try:
        m_acts   = se_ev_res.get("monthly_acts",   {})
        m_events = se_ev_res.get("monthly_events", {})
        if m_acts and m_events:
            all_keys = sorted(set(m_acts) | set(m_events))
            all_p    = pd.PeriodIndex([pd.Period(k) for k in all_keys])
            x = range(len(all_p))
            acts_vals  = [m_acts.get(k, 0)   for k in all_keys]
            event_vals = [m_events.get(k, 0) for k in all_keys]
            labels_m   = _month_labels(all_p)

            fig, ax1 = plt.subplots(figsize=(11, 4))
            ax2 = ax1.twinx()
            ax1.bar(x, acts_vals, color=DEEP_BLUE, alpha=0.6, label="Leading Activities", width=0.6)
            ax2.plot(x, event_vals, color=RED, linewidth=2, marker="o", markersize=4, label="Events")
            ax1.set_xticks(x)
            ax1.set_xticklabels(labels_m, rotation=45, ha="right", fontsize=8)
            ax1.set_ylabel("Leading Activities", color=DEEP_BLUE)
            ax2.set_ylabel("Events", color=RED)
            ax1.set_title("Leading Activities vs Events — Monthly Overlay",
                          fontsize=13, fontweight="bold", color=DEEP_BLUE)
            lines1, labs1 = ax1.get_legend_handles_labels()
            lines2, labs2 = ax2.get_legend_handles_labels()
            ax1.legend(lines1 + lines2, labs1 + labs2, loc="upper left", fontsize=9)
            ax1.spines["top"].set_visible(False)
            ax2.spines["top"].set_visible(False)
            p = os.path.join(output_dir, "ch_overlay.png")
            _save(fig, p)
            charts["overlay"] = p
    except Exception as e:
        log.warning("Chart overlay failed: %s", e)

    # ── 6. Top LLC topics ─────────────────────────────────────────────────────
    try:
        top_topics = at_risk_res.get("top_llc_topics", {})
        if top_topics:
            items = sorted(top_topics.items(), key=lambda x: x[1])[-12:]
            labels_t = [i[0] for i in items]
            vals_t   = [i[1] for i in items]
            fig, ax = plt.subplots(figsize=(8, max(3, len(items) * 0.4)))
            bars = ax.barh(labels_t, vals_t, color=DEEP_BLUE, alpha=0.85)
            for bar, val in zip(bars, vals_t):
                ax.text(val + 0.2, bar.get_y() + bar.get_height() / 2,
                        str(val), va="center", fontsize=9)
            ax.set_title("Top LLC Conversation Topics", fontsize=13,
                         fontweight="bold", color=DEEP_BLUE)
            ax.set_xlabel("Count")
            p = os.path.join(output_dir, "ch_llc_topics.png")
            _save(fig, p)
            charts["llc_topics"] = p
    except Exception as e:
        log.warning("Chart llc_topics failed: %s", e)

    # ── 7. At-risk theme heatmap (horizontal bar) ─────────────────────────────
    try:
        combined = at_risk_res.get("combined_themes", {})
        if combined:
            items = sorted(combined.items(), key=lambda x: x[1])
            labels_r = [i[0] for i in items]
            vals_r   = [i[1] for i in items]
            max_v    = max(vals_r) if vals_r else 1
            colours_r = [
                RED if v >= max_v * 0.7
                else AMBER if v >= max_v * 0.4
                else DARK_GREEN
                for v in vals_r
            ]
            fig, ax = plt.subplots(figsize=(8, max(3, len(items) * 0.4)))
            ax.barh(labels_r, vals_r, color=colours_r, alpha=0.9)
            ax.set_title("At-Risk Behaviour Themes (Combined Sources)",
                         fontsize=13, fontweight="bold", color=DEEP_BLUE)
            ax.set_xlabel("Theme frequency (weighted)")
            p = os.path.join(output_dir, "ch_at_risk_themes.png")
            _save(fig, p)
            charts["at_risk_themes"] = p
    except Exception as e:
        log.warning("Chart at_risk_themes failed: %s", e)

    # ── 8. Events by consequence ──────────────────────────────────────────────
    try:
        ev_df = events_res.get("_df")
        if ev_df is not None and "consequence" in ev_df:
            cons_counts = ev_df["consequence"].value_counts().reindex(
                CONSEQUENCE_ORDER, fill_value=0
            )
            cons_colors = [DARK_GREEN, AMBER, RED, PURPLE, PURPLE]
            fig, ax = plt.subplots(figsize=(7, 3.5))
            bars = ax.bar(cons_counts.index, cons_counts.values,
                          color=cons_colors[:len(cons_counts)], alpha=0.9)
            for bar, val in zip(bars, cons_counts.values):
                if val > 0:
                    ax.text(bar.get_x() + bar.get_width() / 2, val + 0.3,
                            str(val), ha="center", fontsize=10, fontweight="bold")
            ax.set_title("Events by Actual Consequence", fontsize=13,
                         fontweight="bold", color=DEEP_BLUE)
            ax.set_ylabel("Count")
            p = os.path.join(output_dir, "ch_consequence.png")
            _save(fig, p)
            charts["consequence"] = p
    except Exception as e:
        log.warning("Chart consequence failed: %s", e)

    # ── 9. Top leaders (activities) ───────────────────────────────────────────
    try:
        top_leaders = leading_res.get("top_leaders", {})
        if top_leaders:
            items = sorted(top_leaders.items(), key=lambda x: x[1])[-15:]
            fig, ax = plt.subplots(figsize=(8, max(4, len(items) * 0.4)))
            ax.barh([i[0] for i in items], [i[1] for i in items],
                    color=SKY_BLUE, alpha=0.9)
            ax.set_title("Top Leaders by LLC Activity Count",
                         fontsize=13, fontweight="bold", color=DEEP_BLUE)
            ax.set_xlabel("LLC Count")
            p = os.path.join(output_dir, "ch_top_leaders.png")
            _save(fig, p)
            charts["top_leaders"] = p
    except Exception as e:
        log.warning("Chart top_leaders failed: %s", e)

    # ── 10. CRP focus areas ───────────────────────────────────────────────────
    try:
        crp_focus = leading_res.get("crp_focus", {})
        if crp_focus:
            items = sorted(crp_focus.items(), key=lambda x: x[1])
            fig, ax = plt.subplots(figsize=(8, max(3, len(items) * 0.4)))
            ax.barh([i[0] for i in items], [i[1] for i in items],
                    color=MID_GREEN, alpha=0.9)
            ax.set_title("CRP Focus Areas in Leader Learning Conversations",
                         fontsize=13, fontweight="bold", color=DEEP_BLUE)
            ax.set_xlabel("Count")
            p = os.path.join(output_dir, "ch_crp_focus.png")
            _save(fig, p)
            charts["crp_focus"] = p
    except Exception as e:
        log.warning("Chart crp_focus failed: %s", e)

    # ── 11. Two-year quality trend by activity type ──────────────────────────
    try:
        monthly_quality_rows = trends_res.get("monthly_quality", [])
        if monthly_quality_rows:
            qdf = pd.DataFrame(monthly_quality_rows)
            if not qdf.empty:
                periods = pd.PeriodIndex([pd.Period(p, freq="M") for p in qdf["period"]])
                x = range(len(periods))
                fig, ax = plt.subplots(figsize=(11, 4))
                for atype in LEADING_ACTIVITY_TYPES:
                    if atype in qdf.columns and qdf[atype].notna().any():
                        ax.plot(
                            x,
                            qdf[atype],
                            marker="o",
                            linewidth=2,
                            markersize=3.5,
                            label=atype,
                            color=ACTIVITY_COLOURS.get(atype, DEEP_BLUE),
                        )
                ax.set_xticks(x)
                ax.set_xticklabels(_month_labels(periods), rotation=45, ha="right", fontsize=8)
                ax.set_ylim(0, 100)
                ax.yaxis.set_major_locator(mticker.MultipleLocator(10))
                ax.set_ylabel("Average quality score")
                ax.set_title("Two-Year Quality Trend by Activity Type",
                             fontsize=13, fontweight="bold", color=DEEP_BLUE)
                ax.legend(fontsize=9, loc="upper left")
                p = os.path.join(output_dir, "ch_quality_trend.png")
                _save(fig, p)
                charts["quality_trend"] = p
    except Exception as e:
        log.warning("Chart quality_trend failed: %s", e)

    # ── 12. High-volume / low-value units ────────────────────────────────────
    try:
        hvlv = trends_res.get("high_volume_low_value", [])
        if hvlv:
            df_hv = pd.DataFrame(hvlv[:8]).sort_values("count", ascending=True)
            labels = [f"{r['business_unit']} ({r['activity_type']})" for _, r in df_hv.iterrows()]
            fig, ax = plt.subplots(figsize=(9, max(3.5, len(df_hv) * 0.5)))
            ax.barh(labels, df_hv["count"], color=AMBER, alpha=0.85)
            for idx, (_, row) in enumerate(df_hv.iterrows()):
                ax.text(row["count"] + 1, idx, f"{row['shallow_pct']:.0f}% shallow", va="center", fontsize=9)
            ax.set_title("High-Volume / Low-Value Activity Hotspots",
                         fontsize=13, fontweight="bold", color=DEEP_BLUE)
            ax.set_xlabel("Activity count in two-year window")
            p = os.path.join(output_dir, "ch_low_value_units.png")
            _save(fig, p)
            charts["low_value_units"] = p
    except Exception as e:
        log.warning("Chart low_value_units failed: %s", e)

    # ── 13. Serious hotspot ranking ──────────────────────────────────────────
    try:
        serious_projects = events_res.get("serious_projects", {})
        serious_locations = events_res.get("serious_locations", {})
        rows = []
        for label, values in [("Project", serious_projects), ("Location", serious_locations)]:
            for name, count in list(values.items())[:5]:
                rows.append((f"{name} ({label})", int(count)))
        if rows:
            rows = sorted(rows, key=lambda x: x[1])[-10:]
            labels_h = [r[0] for r in rows]
            vals_h = [r[1] for r in rows]
            fig, ax = plt.subplots(figsize=(9, max(3.5, len(rows) * 0.45)))
            bars = ax.barh(labels_h, vals_h, color=RED, alpha=0.9)
            for bar, val in zip(bars, vals_h):
                ax.text(val + 0.1, bar.get_y() + bar.get_height() / 2, str(val), va="center", fontsize=9)
            ax.set_title("Serious Event Hotspots", fontsize=13, fontweight="bold", color=DEEP_BLUE)
            ax.set_xlabel("Moderate / Major / Substantial events")
            p = os.path.join(output_dir, "ch_serious_hotspots.png")
            _save(fig, p)
            charts["serious_hotspots"] = p
    except Exception as e:
        log.warning("Chart serious_hotspots failed: %s", e)

    # ── 14. Project performance quadrant ─────────────────────────────────────
    try:
        proj_best = se_ev_res.get("project_comparison", {}).get("best", [])
        proj_watch = se_ev_res.get("project_comparison", {}).get("watch", [])
        project_rows = {}
        for row in proj_best + proj_watch:
            name = row.get("project")
            if name:
                project_rows[name] = row
        if project_rows:
            pdf = pd.DataFrame(project_rows.values()).head(12)
            fig, ax = plt.subplots(figsize=(8.5, 6))
            x = pdf["activities"].astype(float)
            y = pdf["events"].astype(float)
            sizes = 80 + pdf["serious_events"].astype(float) * 28
            colors = [
                DARK_GREEN if (row["events"] <= y.median() and row["serious_events"] <= pdf["serious_events"].median()) else AMBER
                if row["serious_events"] <= pdf["serious_events"].median() else RED
                for _, row in pdf.iterrows()
            ]
            ax.scatter(x, y, s=sizes, c=colors, alpha=0.75, edgecolors="white", linewidths=1.2)
            ax.axvline(x.median(), color=MUTED, linestyle="--", linewidth=1)
            ax.axhline(y.median(), color=MUTED, linestyle="--", linewidth=1)
            for _, row in pdf.iterrows():
                ax.text(row["activities"] + 2, row["events"] + 0.2, str(row["project"])[:28], fontsize=8, color=DEEP_BLUE)
            ax.set_title("Project Performance Quadrant", fontsize=13, fontweight="bold", color=DEEP_BLUE)
            ax.set_xlabel("Leading activities")
            ax.set_ylabel("Events")
            p = os.path.join(output_dir, "ch_project_quadrant.png")
            _save(fig, p)
            charts["project_quadrant"] = p
    except Exception as e:
        log.warning("Chart project_quadrant failed: %s", e)

    # ── 15. Quality composition by activity type ─────────────────────────────
    try:
        qrows = trends_res.get("quality_by_type", [])
        if qrows:
            qdf = pd.DataFrame(qrows)
            if not qdf.empty:
                mixed = 100 - qdf["high_value_pct"] - qdf["meaningful_pct"] - qdf["shallow_pct"]
                fig, ax = plt.subplots(figsize=(8.5, 3.8))
                left = np.zeros(len(qdf))
                segments = [
                    ("Shallow", qdf["shallow_pct"].values, RED),
                    ("Mixed", mixed.values, AMBER),
                    ("Meaningful", qdf["meaningful_pct"].values, SKY_BLUE),
                    ("High value", qdf["high_value_pct"].values, DARK_GREEN),
                ]
                for label, vals, color in segments:
                    ax.barh(qdf["activity_type"], vals, left=left, color=color, label=label, alpha=0.9)
                    left += vals
                ax.set_xlim(0, 100)
                ax.xaxis.set_major_locator(mticker.MultipleLocator(20))
                ax.set_xlabel("Share of records")
                ax.set_title("Leading Activity Quality Mix", fontsize=13, fontweight="bold", color=DEEP_BLUE)
                ax.legend(loc="lower right", ncol=2, fontsize=8)
                p = os.path.join(output_dir, "ch_quality_mix.png")
                _save(fig, p)
                charts["quality_mix"] = p
    except Exception as e:
        log.warning("Chart quality_mix failed: %s", e)

    log.info("Generated %d charts in %s", len(charts), output_dir)
    return charts


# ─────────────────────────────────────────────────────────────────────────────
# Recommendation generation
# ─────────────────────────────────────────────────────────────────────────────

def _generate_recommendations(
    events_res:    dict,
    leading_res:   dict,
    effectiveness: dict,
    at_risk_res:   dict,
    focus_areas:   dict,
    trends_res:    dict,
    se_events_rel: dict,
) -> list[str]:
    """Derive actionable recommendations from analysis results."""
    recs: list[str] = []

    # Serious events
    serious_pct = float(str(events_res.get("serious_pct", "0")).replace("%", "") or 0)
    if serious_pct >= 3:
        recs.append(
            f"Moderate-or-above consequence events make up {serious_pct:.1f}% of recorded events. "
            "Review whether controls around the highest-consequence scenarios are being verified often enough in field activity."
        )

    # LTI
    if events_res.get("lti_count", 0) > 0:
        recs.append(
            f"{events_res.get('lti_count', 0)} Lost Time Injuries were recorded in the analysis window. "
            "Review the underlying work types, contributing factors, and recovery actions for common patterns."
        )

    # Serious-event timing
    time_buckets = events_res.get("serious_time_buckets", {})
    if time_buckets:
        top_bucket = max(time_buckets, key=time_buckets.get)
        recs.append(
            f"Serious events are most frequently recorded in {top_bucket}. Use this to target pre-start, supervision, and fatigue controls at the riskiest parts of the day."
        )

    # Motor vehicle
    motor = events_res.get("motor_vehicle", {})
    if motor.get("count", 0) > 0:
        recs.append(
            f"Motor vehicle events account for {motor.get('pct_total', 0):.1f}% of all events. "
            "Review journey management, road conditions, and vehicle type patterns in the MVE section."
        )
        top_mv_project = next(iter(motor.get("top_projects", {}).items()), None)
        top_mv_road = next(iter(motor.get("road_types", {}).items()), None)
        if top_mv_project and top_mv_road:
            recs.append(
                f"Prioritise a motor vehicle risk review for {top_mv_project[0]} where MV exposure is most visible, "
                f"with particular attention to {top_mv_road[0]} driving conditions."
            )

    # Activity trend
    trend = leading_res.get("activity_trend", "")
    if "declining" in trend:
        recs.append(
            f"Leading activity volumes show a declining trend ({trend}). "
            "Leaders should re-engage with LLC, CCC, and OCC completion targets."
        )

    # Declining BUs
    for bu in focus_areas.get("declining_bus", []):
        recs.append(
            f"Business Unit '{bu}' shows declining leading-activity volume in the recent period. "
            "Targeted engagement from the sector SHEQ team is recommended."
        )

    # High activity but high events
    for bu in effectiveness.get("high_activity_high_events", []):
        recs.append(
            f"'{bu}' has both high leading-activity and high event volumes. "
            "This may indicate reactive activity patterns — review whether conversations "
            "are targeting root causes rather than responding after the fact."
        )

    project_watch = se_events_rel.get("project_comparison", {}).get("watch", [])
    if project_watch:
        top = project_watch[0]
        recs.append(
            f"Focus the next leadership review on project '{top.get('project')}', which recorded "
            f"{top.get('events', 0)} events and {top.get('serious_events', 0)} serious events against "
            f"{top.get('activities', 0)} leading activities."
        )

    location_watch = se_events_rel.get("location_comparison", {}).get("watch", [])
    if location_watch:
        top = location_watch[0]
        recs.append(
            f"Target field verification and local coaching at location '{top.get('location')}', where "
            f"{top.get('serious_events', 0)} serious events have been recorded and the activity-to-event ratio is "
            f"{top.get('activity_event_ratio', 'low')}."
        )

    project_best = se_events_rel.get("project_comparison", {}).get("best", [])
    if project_best:
        best = project_best[0]
        recs.append(
            f"Review what is working in project '{best.get('project')}', which shows comparatively strong leading-activity coverage "
            f"with {best.get('activities', 0)} activities and {best.get('events', 0)} events, and replicate the practice in weaker areas."
        )

    # Gap themes
    gap = at_risk_res.get("gap_themes", [])
    if gap:
        recs.append(
            f"The following risk themes appear frequently in events but are under-represented "
            f"in LLC conversations: {', '.join(gap)}. "
            "Consider incorporating these topics into forthcoming LLC schedules."
        )

    for item in trends_res.get("high_volume_low_value", [])[:3]:
        recs.append(
            f"{item['business_unit']} has a high-volume / low-value {item['activity_type']} pattern "
            f"({item['count']} records, {item['shallow_pct']:.1f}% shallow). Sample the underlying entries "
            "with local leaders and tighten expectations for narrative quality and follow-up."
        )

    for item in trends_res.get("recommendations", [])[:2]:
        recs.append(item)

    input_depth = trends_res.get("input_depth", {})
    if input_depth.get("correlation") is not None and input_depth["correlation"] >= 0.4:
        recs.append(
            f"Input depth is moderately aligned with record quality (r = {input_depth['correlation']:.2f}). Track populated-field depth as a simple supporting KPI alongside the richer quality score."
        )

    ccc = trends_res.get("activity_insights", {}).get("CCC", {})
    if ccc:
        recs.append(
            f"Lift CCC quality expectations in priority areas: current CCC quality averages {ccc.get('avg_quality', 0):.1f}/100 with "
            f"{ccc.get('shallow_pct', 0):.1f}% shallow records and only {ccc.get('follow_up_pct', 0):.1f}% showing follow-up signals."
        )

    serious_projects = events_res.get("serious_projects", {})
    if serious_projects:
        top_project, top_count = next(iter(serious_projects.items()))
        recs.append(
            f"Escalate a focused action plan for project '{top_project}', which currently has the highest serious-event burden ({top_count} serious events)."
        )

    # Default if nothing triggered
    if not recs:
        recs.append(
            "No significant adverse trends identified in the current period. "
            "Continue current leading activity cadence and monitor monthly."
        )

    deduped: list[str] = []
    seen: set[str] = set()
    for rec in recs:
        key = rec.strip()
        if key and key not in seen:
            seen.add(key)
            deduped.append(key)
    return deduped


# ─────────────────────────────────────────────────────────────────────────────
# Main orchestration
# ─────────────────────────────────────────────────────────────────────────────

def run_full_analysis(
    events:         pd.DataFrame,
    safety_energy:  pd.DataFrame,
    llc:            pd.DataFrame,
    start_date:     str,
    split_date:     str,
    pd1_name:       str,
    pd2_name:       str,
    output_dir:     str,
) -> AnalysisResults:
    """
    Run the complete SHEQ analysis pipeline across all three data sources.

    Parameters
    ----------
    events         : normalised Events DataFrame (from data_loader)
    safety_energy  : normalised Safety Energy DataFrame
    llc            : normalised LLC Data DataFrame
    start_date     : ISO date string — analysis window start
    split_date     : retained for backwards compatibility; ignored by the sector-wide report
    pd1_name       : retained for backwards compatibility; ignored by the sector-wide report
    pd2_name       : retained for backwards compatibility; ignored by the sector-wide report
    output_dir     : directory for chart images and output files

    Returns
    -------
    AnalysisResults dataclass
    """
    os.makedirs(output_dir, exist_ok=True)
    log.info("=== SHEQ Full Analysis ===")
    log.info("  start=%s  output_dir=%s", start_date, output_dir)

    results = AnalysisResults()
    results.params = {
        "start_date": start_date,
        "output_dir": output_dir,
    }

    # 1. Data quality
    log.info("[1/7] Data quality profiling...")
    results.data_quality = _profile_data_quality(events, safety_energy, llc)

    # 2. Events analysis
    log.info("[2/7] Events analysis...")
    results.events_summary = _analyse_events(
        events, start_date, split_date, pd1_name, pd2_name
    )

    # 3. Leading activity analysis
    log.info("[3/7] Leading activity analysis...")
    results.leading_summary = _analyse_leading(safety_energy, llc, start_date)

    # 4. Effectiveness analysis
    log.info("[4/7] Effectiveness analysis...")
    results.effectiveness = _analyse_effectiveness(
        results.events_summary, results.leading_summary
    )

    # 5. At-risk behaviour analysis
    log.info("[5/7] At-risk behaviour extraction...")
    results.at_risk = _extract_at_risk_themes(events, safety_energy, llc, start_date)

    # 6. SE ↔ Events relationship
    log.info("[6/7] Safety Energy ↔ Events relationship...")
    results.se_events_rel = _analyse_se_events_relationship(
        events, safety_energy, start_date
    )

    # Rolling two-year Safety Energy trends and quality
    results.trends = _analyse_two_year_trends(
        safety_energy, llc, events, start_date
    )

    # 7. Focus areas
    results.focus_areas = _analyse_focus_areas(events, safety_energy, llc, start_date)

    # Charts
    log.info("[7/7] Generating charts...")
    results.charts = _generate_charts(
        results.events_summary,
        results.leading_summary,
        results.se_events_rel,
        results.at_risk,
        results.trends,
        output_dir,
        pd1_name,
        pd2_name,
        split_date,
    )

    # Recommendations and caveats
    results.recommendations = _generate_recommendations(
        results.events_summary,
        results.leading_summary,
        results.effectiveness,
        results.at_risk,
        results.focus_areas,
        results.trends,
        results.se_events_rel,
    )
    results.caveats = [
        "All analysis in this report is based on data exported from Ventia's safety management "
        "system. Data quality depends on the completeness and accuracy of field entries.",
        "Correlation and association findings do not imply causation. They are presented to "
        "guide further investigation, not to draw definitive conclusions.",
        "Activity counts reflect recorded activities only. Under-reporting in any area will "
        "affect the reliability of leading-indicator analysis.",
        "Theme extraction from free-text fields uses keyword matching and may miss nuance "
        "or misclassify entries. Manual review of flagged themes is recommended.",
        "Business unit comparisons may be affected by differences in headcount, contract scope, "
        "and operational complexity between units.",
    ]

    log.info("Analysis complete. %d charts, %d recommendations.",
             len(results.charts), len(results.recommendations))

    # Remove private DataFrames before returning (not needed by report_builder)
    for key in ("_df", "_serious", "_motor"):
        results.events_summary.pop(key, None)
    for key in ("_se_f", "_llc_f"):
        results.leading_summary.pop(key, None)
    for key in ("_quality_df", "_llc_window"):
        results.trends.pop(key, None)

    return results