report_builder.py

"""
report_builder.py — DOCX report generation for the SHEQ Analysis Tool.

Takes an AnalysisResults object from analysis_engine and produces a
fully-formatted DOCX report following the Ventia brand guidelines in
DESIGN.md.

Public API
----------
build_report(results: AnalysisResults, output_dir: str) -> str
    Returns the path to the generated .docx file.
"""

from __future__ import annotations

import logging
import os
from datetime import datetime
from typing import Any

import pandas as pd
from docx import Document
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls
from docx.shared import Cm, Inches, Pt, RGBColor

from analysis_engine import AnalysisResults
from config import (
    CONSEQUENCE_ORDER,
    DEEP_BLUE, SKY_BLUE, DARK_GREEN, MUTED,
    AMBER, RED,
)

log = logging.getLogger(__name__)

# ── Brand RGBColor objects ────────────────────────────────────────────────────
NAVY  = RGBColor(0x0B, 0x32, 0x54)  # Deep Blue
TEAL  = RGBColor(0x13, 0xB5, 0xEA)  # Sky Blue
GREEN = RGBColor(0x00, 0x6E, 0x47)  # Dark Green
GREY  = RGBColor(0x64, 0x74, 0x8B)  # Muted / slate grey
ALERT = RGBColor(0xDC, 0x26, 0x26)  # Red


# ─────────────────────────────────────────────────────────────────────────────
# Low-level DOCX helpers
# ─────────────────────────────────────────────────────────────────────────────

def _shading(cell, hex_color: str) -> None:
    """Apply background fill to a table cell."""
    shd = parse_xml(
        f'<w:shd {nsdecls("w")} w:fill="{hex_color}" w:val="clear"/>'
    )
    cell._tc.get_or_add_tcPr().append(shd)


def _run(para, text: str, bold: bool = False, size_pt: int = 11,
         colour: RGBColor = NAVY, italic: bool = False) -> None:
    run = para.add_run(text)
    run.bold = bold
    run.italic = italic
    run.font.size = Pt(size_pt)
    if isinstance(colour, str):
        colour = RGBColor.from_string(colour.replace("#", "").upper())
    run.font.color.rgb = colour
    run.font.name = "Source Sans Pro"


def _heading(doc: Document, text: str, level: int) -> None:
    doc.add_heading(text, level=level)


def _para(doc: Document, text: str = "", bold: bool = False,
          size_pt: int = 11, colour: RGBColor = NAVY) -> None:
    p = doc.add_paragraph()
    _run(p, text, bold=bold, size_pt=size_pt, colour=colour)


def _bullet(doc: Document, text: str, size_pt: int = 11) -> None:
    p = doc.add_paragraph(style="List Bullet")
    _run(p, text, size_pt=size_pt, colour=NAVY)


def _callout(doc: Document, title: str, text: str,
             fill: str = "f0f5fa", accent: RGBColor = TEAL) -> None:
    table = doc.add_table(rows=1, cols=1)
    table.alignment = WD_TABLE_ALIGNMENT.LEFT
    table.style = "Table Grid"
    cell = table.cell(0, 0)
    _shading(cell, fill)
    cell.text = ""
    p1 = cell.paragraphs[0]
    _run(p1, title, bold=True, size_pt=11, colour=accent)
    p2 = cell.add_paragraph()
    _run(p2, text, size_pt=10, colour=NAVY)


def _metric_cards(doc: Document, cards: list[tuple[str, str]],
                  cols: int = 4, fill: str = "f0f5fa") -> None:
    if not cards:
        return
    rows = (len(cards) + cols - 1) // cols
    table = doc.add_table(rows=rows, cols=cols)
    table.alignment = WD_TABLE_ALIGNMENT.LEFT
    table.style = "Table Grid"
    idx = 0
    for r in range(rows):
        for c in range(cols):
            cell = table.cell(r, c)
            cell.text = ""
            _shading(cell, fill if idx % 2 == 0 else "ffffff")
            if idx < len(cards):
                title, value = cards[idx]
                p1 = cell.paragraphs[0]
                _run(p1, title, bold=False, size_pt=8, colour=GREY)
                p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
                p2 = cell.add_paragraph()
                _run(p2, value, bold=True, size_pt=16, colour=NAVY)
                p2.alignment = WD_ALIGN_PARAGRAPH.CENTER
            idx += 1


def _add_table(
    doc: Document,
    headers: list[str],
    rows: list[list[str]],
    col_widths: list[float],   # inches
    header_fill: str = "0b3254",
    alt_fill: str = "f0f5fa",
) -> None:
    """Add a brand-styled table with a Deep Blue header row."""
    table = doc.add_table(rows=1 + len(rows), cols=len(headers))
    table.alignment = WD_TABLE_ALIGNMENT.LEFT
    table.style = "Table Grid"

    # Header row
    for i, h in enumerate(headers):
        cell = table.rows[0].cells[i]
        cell.text = ""
        p = cell.paragraphs[0]
        run = p.add_run(h)
        run.bold = True
        run.font.size = Pt(9)
        run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
        run.font.name = "Source Sans Pro"
        _shading(cell, header_fill)

    # Data rows
    for ri, row in enumerate(rows):
        for ci, val in enumerate(row):
            cell = table.rows[ri + 1].cells[ci]
            cell.text = ""
            p = cell.paragraphs[0]
            run = p.add_run(str(val) if val is not None else "—")
            run.font.size = Pt(9)
            run.font.name = "Source Sans Pro"
            bg = alt_fill if ri % 2 == 0 else "ffffff"
            _shading(cell, bg)

    # Column widths
    for ri, row in enumerate(table.rows):
        for ci, w in enumerate(col_widths):
            if ci < len(row.cells):
                row.cells[ci].width = Inches(w)


def _add_chart(doc: Document, charts: dict[str, str], key: str,
               width_in: float = 5.5) -> None:
    """Insert a chart image if it exists."""
    path = charts.get(key)
    if path and os.path.exists(path):
        doc.add_picture(path, width=Inches(width_in))
    else:
        _para(doc, f"[Chart '{key}' not available]", colour=GREY, size_pt=9)


def _spacer(doc: Document) -> None:
    doc.add_paragraph("")


def _page_break(doc: Document) -> None:
    doc.add_page_break()


# ─────────────────────────────────────────────────────────────────────────────
# Document bootstrap
# ─────────────────────────────────────────────────────────────────────────────

def _bootstrap_styles(doc: Document) -> None:
    """Apply brand typography to the Document's built-in styles."""
    style = doc.styles["Normal"]
    style.font.name = "Source Sans Pro"
    style.font.size = Pt(11)

    for level, size, colour in [
        (1, 16, NAVY),
        (2, 13, TEAL),
        (3, 11, NAVY),
    ]:
        hs = doc.styles[f"Heading {level}"]
        hs.font.name = "Source Sans Pro"
        hs.font.size = Pt(size)
        hs.font.color.rgb = colour
        hs.font.bold = True

    sec = doc.sections[0]
    sec.top_margin = Cm(1.5)
    sec.bottom_margin = Cm(1.4)
    sec.left_margin = Cm(1.6)
    sec.right_margin = Cm(1.6)


# ─────────────────────────────────────────────────────────────────────────────
# Title page
# ─────────────────────────────────────────────────────────────────────────────

def _title_page(doc: Document, results: AnalysisResults) -> None:
    dq       = results.data_quality

    ev_from = dq.get("events", {}).get("date_from", "N/A")
    ev_to   = dq.get("events", {}).get("date_to",   "N/A")
    se_from = dq.get("safety_energy", {}).get("date_from", "N/A")
    se_to   = dq.get("safety_energy", {}).get("date_to",   "N/A")

    banner = doc.add_table(rows=1, cols=1)
    banner.style = "Table Grid"
    cell = banner.cell(0, 0)
    _shading(cell, "0b3254")
    cell.text = ""
    p = cell.paragraphs[0]
    p.alignment = WD_ALIGN_PARAGRAPH.LEFT
    _run(p, "SHEQ Safety Performance Report", bold=True, size_pt=26, colour=RGBColor(0xFF, 0xFF, 0xFF))
    p2 = cell.add_paragraph()
    _run(p2, "Safety Energy, event risk, and leadership focus areas", size_pt=13, colour=RGBColor(0xD7, 0xF2, 0xFF))

    _spacer(doc)
    _callout(
        doc,
        "Report Scope",
        f"Events window: {ev_from} – {ev_to}. Leading activity window: {se_from} – {se_to}. "
        "Built for senior leaders as a concise decision-support pack rather than a compliance summary.",
        fill="eef6fb",
        accent=TEAL,
    )
    _spacer(doc)
    _metric_cards(doc, [
        ("Generated", datetime.now().strftime("%d %b %Y")),
        ("Events Coverage", f"{ev_from} to {ev_to}"),
        ("Safety Energy Coverage", f"{se_from} to {se_to}"),
        ("Audience", "Executive / Board"),
    ], cols=2)

    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 1. Executive Summary
# ─────────────────────────────────────────────────────────────────────────────

def _section_executive_summary(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "1. Executive Summary", 1)
    ev    = results.events_summary
    lead  = results.leading_summary
    trends = results.trends
    recs  = results.recommendations

    totals = lead.get("totals", {})

    _callout(
        doc,
        "Leadership Readout",
        "This summary highlights where event risk is concentrated, where leading activities appear strongest or weakest, "
        "and what senior leaders should prioritise next.",
        fill="eef6fb",
        accent=TEAL,
    )
    _spacer(doc)
    _metric_cards(doc, [
        ("Total Events", str(ev.get("total", 0))),
        ("Moderate+ Events", f"{ev.get('serious_count', 0)}"),
        ("Events / Month", f"{ev.get('events_per_month', 0):.1f}"),
        ("Motor Vehicle Events", str(ev.get("motor_vehicle", {}).get("count", 0))),
        ("LLCs", str(totals.get("LLC", 0))),
        ("CCCs", str(totals.get("CCC", 0))),
        ("OCCs", str(totals.get("OCC", 0))),
        ("CCC Avg Quality", f"{trends.get('activity_insights', {}).get('CCC', {}).get('avg_quality', 0):.1f}"),
    ], cols=4)
    _spacer(doc)

    _heading(doc, "1.1 What Leaders Should Know", 2)
    for rec in trends.get("executive_summary", [])[:4]:
        _bullet(doc, rec)
    _spacer(doc)

    _heading(doc, "1.2 Board Visual Snapshot", 2)
    _add_chart(doc, results.charts, "quality_mix", 6.0)
    _spacer(doc)
    _add_chart(doc, results.charts, "project_quadrant", 6.0)
    _spacer(doc)

    _heading(doc, "1.3 Priority Actions", 2)
    for rec in recs[:5]:
        _callout(doc, "Recommended Action", rec, fill="f7fbff", accent=NAVY)
        _spacer(doc)

    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 2. Data Quality
# ─────────────────────────────────────────────────────────────────────────────

def _section_data_quality(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "2. Data Quality and Coverage", 1)
    dq = results.data_quality

    _para(doc,
          "This section summarises the completeness and date coverage of each data source. "
          "Any gaps identified here may affect the reliability of subsequent analysis sections.",
          size_pt=11, colour=NAVY)
    _spacer(doc)

    sources = [
        ("Events", dq.get("events", {}),
         [("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"),
          ("Null — Event Type", "null_event_type"), ("Null — Consequence", "null_consequence"),
          ("Null — Business Unit", "null_business_unit"), ("Null — Root Cause", "null_root_cause")]),
        ("Safety Energy", dq.get("safety_energy", {}),
         [("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"),
          ("Null — Leader", "null_leader"), ("Null — Business Unit", "null_bu")]),
        ("LLC Data", dq.get("llc", {}),
         [("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"),
          ("Null — Topic", "null_topic"), ("Null — Leader", "null_leader")]),
    ]

    for label, data, fields in sources:
        _heading(doc, label, 2)
        rows = [[f, str(data.get(k, "N/A"))] for f, k in fields]
        _add_table(doc, ["Field", "Value"], rows, [3.0, 3.5])
        _spacer(doc)

        # Activity type breakdown for Safety Energy
        if label == "Safety Energy":
            breakdown = data.get("type_breakdown", {})
            if breakdown:
                br_rows = [[k, str(v)] for k, v in sorted(breakdown.items())]
                _add_table(doc, ["Activity Type", "Count"], br_rows, [3.0, 3.5])
                _spacer(doc)

    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 3. Events Analysis
# ─────────────────────────────────────────────────────────────────────────────

def _section_events(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "3. Events Analysis", 1)
    ev    = results.events_summary
    chts  = results.charts

    _callout(
        doc,
        "Event Story",
        "This section focuses on where event burden is building, where serious outcomes are concentrated, and what special risk signals are visible in timing and motor vehicle data.",
        fill="f7fbff",
        accent=TEAL,
    )
    _spacer(doc)

    # Monthly trend chart
    _heading(doc, "3.1 Monthly Trend", 2)
    _add_chart(doc, chts, "events_monthly", 6.0)
    _spacer(doc)

    # Consequence chart
    _heading(doc, "3.2 Consequence Severity", 2)
    _add_chart(doc, chts, "consequence", 5.5)
    _spacer(doc)

    # Event type table
    _heading(doc, "3.3 Event Type Breakdown", 2)
    event_types = ev.get("event_type_counts", {})
    total_events = max(ev.get("total", 1), 1)
    et_rows = [[t, str(c), f"{c/total_events*100:.1f}%"] for t, c in event_types.items()]
    _add_table(doc, ["Event Type", "Count", "%"], et_rows, [3.0, 1.2, 1.0])
    _spacer(doc)

    # CRP table
    crp = ev.get("crp_counts", {})
    if crp:
        _heading(doc, "3.4 Critical Risk Protocols Involved", 2)
        crp_rows = [[c, str(v)] for c, v in crp.items()]
        _add_table(doc, ["CRP", "Count"], crp_rows, [4.0, 1.5])
        _spacer(doc)

    # Root cause table
    rc = ev.get("root_cause_counts", {})
    if rc:
        _heading(doc, "3.5 Root Cause Categories", 2)
        rc_rows = [[r, str(v)] for r, v in rc.items()]
        _add_table(doc, ["Root Cause", "Count"], rc_rows, [4.0, 1.5])
        _spacer(doc)

    serious_projects = ev.get("serious_projects", {})
    serious_locations = ev.get("serious_locations", {})
    if serious_projects or serious_locations:
        _heading(doc, "3.6 Serious Event Hotspots", 2)
        _para(doc,
              "These are the projects and locations with the highest counts of moderate, major, or substantial events in the analysis window.",
              size_pt=11, colour=NAVY)
        _add_chart(doc, chts, "serious_hotspots", 5.8)
        _spacer(doc)
        if serious_projects:
            rows = [[k, str(v)] for k, v in serious_projects.items()]
            _add_table(doc, ["Project", "Serious Events"], rows, [4.0, 1.5])
            _spacer(doc)
        if serious_locations:
            rows = [[k, str(v)] for k, v in serious_locations.items()]
            _add_table(doc, ["Location", "Serious Events"], rows, [4.0, 1.5])
            _spacer(doc)

    time_buckets = ev.get("serious_time_buckets", {})
    if time_buckets:
        _heading(doc, "3.7 Serious Event Timing", 2)
        _para(doc,
              f"Time-of-day information was available for {ev.get('serious_time_coverage_pct', 0):.1f}% of serious events.",
              size_pt=11, colour=NAVY)
        rows = [[bucket, str(count)] for bucket, count in time_buckets.items()]
        _add_table(doc, ["Time of Day", "Serious Events"], rows, [3.5, 1.5])
        _spacer(doc)

    motor = ev.get("motor_vehicle", {})
    if motor.get("count", 0) > 0:
        _heading(doc, "3.8 Motor Vehicle Events", 2)
        mv_rows = [
            ["Total Motor Vehicle Events", str(motor.get("count", 0))],
            ["Share of All Events", f"{motor.get('pct_total', 0):.1f}%"],
            ["Moderate+ MV Events", str(motor.get("serious_count", 0))],
            ["Serious Rate Within MV Events", f"{motor.get('serious_pct_within_mve', 0):.1f}%"],
        ]
        _add_table(doc, ["Metric", "Value"], mv_rows, [3.5, 2.0])
        _spacer(doc)
        if motor.get("top_projects"):
            rows = [[k, str(v)] for k, v in motor.get("top_projects", {}).items()]
            _add_table(doc, ["Project", "MV Events"], rows, [4.0, 1.5])
            _spacer(doc)
        rows = []
        for label, values in [
            ("Road Type", motor.get("road_types", {})),
            ("Road Condition", motor.get("conditions", {})),
            ("Vehicle Type", motor.get("vehicle_types", {})),
        ]:
            for key, val in list(values.items())[:4]:
                rows.append([label, key, str(val)])
        if rows:
            _add_table(doc, ["Category", "Pattern", "Count"], rows, [1.5, 3.0, 1.0])
        _spacer(doc)

    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 4. Leading Activity Overview
# ─────────────────────────────────────────────────────────────────────────────

def _section_leading_overview(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "4. Safety Energy — Leading Activity Overview", 1)
    lead = results.leading_summary
    trends = results.trends
    chts = results.charts

    _callout(
        doc,
        "Leading Activity Readout",
        "Safety Energy combines LLC, CCC, and OCC activity. The deeper two-year review is designed to show not just volume, "
        "but whether the records look rich, preventive, and useful for leadership learning.",
        fill="eef6fb",
        accent=TEAL,
    )
    _spacer(doc)

    # Activity mix donut
    _heading(doc, "4.1 Activity Type Mix", 2)
    _add_chart(doc, chts, "activity_mix", 4.5)
    _spacer(doc)

    # Totals table
    totals = lead.get("totals", {})
    avg_at_risk = lead.get("avg_at_risk", {})
    if totals:
        rows = [
            [atype,
             str(totals.get(atype, 0)),
             f"{avg_at_risk.get(atype, 0):.2f}"]
            for atype in ["LLC", "CCC", "OCC"]
            if totals.get(atype, 0) > 0
        ]
        _add_table(
            doc,
            ["Activity Type", "Total Count", "Avg At-Risk Aspects per Activity"],
            rows,
            [2.5, 1.5, 3.0],
        )
        _spacer(doc)

    # Monthly trend chart
    _heading(doc, "4.2 Monthly Activity Trend", 2)
    _para(doc, f"Overall trend: {lead.get('activity_trend', 'N/A')}", size_pt=11, colour=GREY)
    _add_chart(doc, chts, "leading_monthly", 6.0)
    _spacer(doc)

    # BU breakdown
    _heading(doc, "4.3 Activity by Business Unit", 2)
    bu_by_type = lead.get("bu_by_type", {})
    all_bus: list[str] = sorted(set(
        bu for d in bu_by_type.values() for bu in d.keys()
    ))
    if all_bus:
        rows = [
            [bu] + [str(bu_by_type.get(at, {}).get(bu, 0)) for at in ["LLC", "CCC", "OCC"]]
            for bu in all_bus
        ]
        _add_table(doc,
                   ["Business Unit", "LLC", "CCC", "OCC"],
                   rows,
                   [2.8, 1.0, 1.0, 1.0])
        _spacer(doc)

    # LLC topic breakdown (from LLC_Data)
    _heading(doc, "4.4 LLC Conversation Topics", 2)
    _para(doc,
          "The following topics were most frequently recorded in Leader Learning Conversations. "
          "Topic coverage indicates where leaders are directing their field conversations.",
          size_pt=11, colour=NAVY)
    _add_chart(doc, chts, "llc_topics", 5.5)
    _spacer(doc)

    top_topics = lead.get("top_topics", {})
    if top_topics:
        rows = [[k, str(v)] for k, v in list(top_topics.items())[:12]]
        _add_table(doc, ["Topic", "Count"], rows, [4.0, 1.5])
        _spacer(doc)

    # CRP focus
    _heading(doc, "4.5 CRP Focus Areas in LLCs", 2)
    _add_chart(doc, chts, "crp_focus", 5.5)
    _spacer(doc)

    # Top leaders
    _heading(doc, "4.6 Most Active Leaders (LLC)", 2)
    _add_chart(doc, chts, "top_leaders", 5.5)
    _spacer(doc)

    top_leaders = lead.get("top_leaders", {})
    if top_leaders:
        rows = [[l, str(c)] for l, c in list(top_leaders.items())[:12]]
        _add_table(doc, ["Leader", "LLC Count"], rows, [4.0, 1.5])
        _spacer(doc)

    # Two-year trend and quality view
    _heading(doc, "4.7 Rolling Two-Year Trend and Quality View", 2)
    _para(doc,
          f"This view uses a rolling two-year Safety Energy window from "
          f"{trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}. "
          "It is designed to answer not just whether activities were completed, but whether "
          "the records suggest meaningful learning, risk recognition, and follow-up quality.",
          size_pt=11, colour=NAVY)
    _para(doc, trends.get("proxy_note", ""), size_pt=10, colour=GREY)
    _add_chart(doc, chts, "quality_trend", 6.0)
    _spacer(doc)
    _add_chart(doc, chts, "quality_mix", 6.0)
    _spacer(doc)

    quality_rows = trends.get("quality_by_type", [])
    if quality_rows:
        rows = [
            [
                r.get("activity_type", ""),
                str(r.get("count", 0)),
                f"{r.get('avg_quality', 0):.1f}",
                f"{r.get('avg_input_depth', 0):.1f}",
                f"{r.get('meaningful_pct', 0):.1f}%",
                f"{r.get('high_value_pct', 0):.1f}%",
                f"{r.get('shallow_pct', 0):.1f}%",
                f"{r.get('follow_up_pct', 0):.1f}%",
            ]
            for r in quality_rows
        ]
        _add_table(
            doc,
            ["Type", "Count", "Avg Quality", "Avg Input Depth", "Meaningful", "High Value", "Shallow", "Follow-up"],
            rows,
            [0.8, 0.8, 0.9, 1.0, 0.9, 0.9, 0.9, 0.9],
        )
        _spacer(doc)

    input_depth = trends.get("input_depth", {})
    if input_depth.get("correlation") is not None:
        _heading(doc, "4.8 Input Depth as a Supporting Quality Metric", 2)
        _para(doc,
              f"Across the two-year Safety Energy window, input depth and quality score are correlated at r = {input_depth.get('correlation'):.2f}. "
              f"{input_depth.get('note', '')}",
              size_pt=11, colour=NAVY)
        by_band = input_depth.get("by_band", [])
        if by_band:
            rows = [
                [
                    r.get("band", ""),
                    str(r.get("count", 0)),
                    f"{r.get('avg_input_depth', 0):.1f}",
                    f"{r.get('avg_quality', 0):.1f}",
                    f"{r.get('meaningful_pct', 0):.1f}%",
                    f"{r.get('high_value_pct', 0):.1f}%",
                    f"{r.get('shallow_pct', 0):.1f}%",
                ]
                for r in by_band
            ]
            _add_table(
                doc,
                ["Band", "Count", "Avg Input Depth", "Avg Quality", "Meaningful", "High Value", "Shallow"],
                rows,
                [0.9, 0.8, 1.0, 0.9, 0.9, 0.9, 0.9],
            )
            _spacer(doc)

    top_themes = trends.get("top_themes", {})
    if top_themes:
        _para(doc, "Most common recurring themes in Safety Energy narratives:", bold=True, size_pt=11, colour=NAVY)
        for theme, count in list(top_themes.items())[:5]:
            _bullet(doc, f"{theme}: {count} mentions")
        _spacer(doc)

    _heading(doc, "4.9 CCC / OCCC / LLC Value Signals", 2)
    activity_insights = trends.get("activity_insights", {})
    for atype in ["CCC", "OCC", "LLC"]:
        insight = activity_insights.get(atype, {})
        if not insight:
            continue
        _para(
            doc,
            f"{atype}: average quality {insight.get('avg_quality', 0):.1f}/100, "
            f"average input depth {insight.get('avg_input_depth', 0):.1f}/100, "
            f"{insight.get('preventive_pct', 0):.1f}% preventive signal, "
            f"{insight.get('reactive_pct', 0):.1f}% reactive signal, "
            f"{insight.get('repetitive_pct', 0):.1f}% repetitive signal, "
            f"{insight.get('shallow_pct', 0):.1f}% shallow.",
            size_pt=11,
            colour=NAVY,
        )
        depth = insight.get("input_depth", {})
        if depth.get("correlation") is not None:
            _para(doc,
                  f"For {atype}, input depth vs quality correlation is r = {depth.get('correlation'):.2f}.",
                  size_pt=10, colour=GREY)
        top_modules = insight.get("top_modules", {})
        if top_modules:
            _para(doc, f"Top {atype} focus areas:", bold=True, size_pt=10, colour=GREY)
            for label, count in list(top_modules.items())[:4]:
                _bullet(doc, f"{label}: {count}")
    _spacer(doc)

    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 5. Effectiveness of Leading Activities
# ─────────────────────────────────────────────────────────────────────────────

def _section_effectiveness(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "5. Effectiveness of Leading Activities", 1)
    eff  = results.effectiveness
    chts = results.charts

    _para(doc,
          "This section examines whether leading activity patterns appear associated with "
          "event outcomes at a business unit and portfolio level. All findings are associative "
          "only — correlation does not imply causation.",
          size_pt=11, colour=NAVY)
    _spacer(doc)

    # Monthly overlay chart
    _heading(doc, "5.1 Monthly Activities vs Events Overlay", 2)
    _add_chart(doc, chts, "overlay", 6.0)
    _spacer(doc)

    # Correlation note
    _heading(doc, "5.2 Statistical Association", 2)
    _para(doc, eff.get("corr_note", "N/A"), size_pt=11, colour=NAVY)
    _spacer(doc)

    # BU comparison chart
    _heading(doc, "5.3 Activities vs Events by Business Unit", 2)
    _add_chart(doc, chts, "bu_comparison", 5.5)
    _spacer(doc)

    # BU table
    bu_table = eff.get("bu_table", [])
    if bu_table:
        rows = [
            [r.get("business_unit", ""), str(r.get("activities", 0)),
             str(r.get("events", 0))]
            for r in bu_table
        ]
        _add_table(doc,
                   ["Business Unit", "Leading Activities", "Events"],
                   rows,
                   [3.0, 2.0, 1.5])
        _spacer(doc)

    # Pattern commentary
    high_both = eff.get("high_activity_high_events", [])
    high_acts = eff.get("high_activity_low_events", [])

    if high_both:
        _heading(doc, "5.4 High Activity / High Events — Pattern of Interest", 2)
        _para(doc,
              "The following Business Units recorded both above-median leading-activity volumes "
              "and above-median event counts. This may indicate reactive activity patterns where "
              "engagement is increasing in response to events rather than preventing them. "
              "Further investigation is recommended.",
              size_pt=11, colour=NAVY)
        for bu in high_both:
            _bullet(doc, bu)
        _spacer(doc)

    if high_acts:
        _heading(doc, "5.5 High Activity / Low Events — Positive Signal", 2)
        _para(doc,
              "The following Business Units recorded above-median leading-activity volumes "
              "and below-median event counts. This pattern is consistent with leading activities "
              "having a preventive effect, though this cannot be confirmed from available data.",
              size_pt=11, colour=NAVY)
        for bu in high_acts:
            _bullet(doc, bu)
        _spacer(doc)

    _para(doc, eff.get("note", ""), size_pt=10, colour=GREY)
    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 6. At-Risk Behaviours
# ─────────────────────────────────────────────────────────────────────────────

def _section_at_risk(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "6. At-Risk Behaviours", 1)
    ar   = results.at_risk
    chts = results.charts

    _para(doc,
          "At-risk behaviour themes are identified by analysing free-text fields across all "
          "three data sources (Events descriptions, LLC conversation notes, and Safety Energy "
          "observations) using keyword matching against known risk categories.",
          size_pt=11, colour=NAVY)
    _spacer(doc)

    _heading(doc, "6.1 Combined Theme Frequency", 2)
    _add_chart(doc, chts, "at_risk_themes", 5.5)
    _spacer(doc)

    combined = ar.get("combined_themes", {})
    if combined:
        rows = [[k, str(v)] for k, v in combined.items()]
        _add_table(
            doc,
            ["Risk Theme", "Weighted Frequency"],
            rows,
            [3.5, 2.0],
        )
        _spacer(doc)

    # LLC theme focus vs event themes
    _heading(doc, "6.2 LLC Conversation Topics vs Event Themes", 2)
    llc_themes = ar.get("llc_themes", {})
    ev_themes  = ar.get("event_themes", {})

    if llc_themes or ev_themes:
        all_themes = sorted(
            set(list(llc_themes.keys()) + list(ev_themes.keys()))
        )
        rows = [
            [t, str(llc_themes.get(t, 0)), str(ev_themes.get(t, 0))]
            for t in all_themes
        ]
        _add_table(doc,
                   ["Risk Theme", "LLC Mentions", "Event Mentions"],
                   rows,
                   [3.0, 1.5, 1.5])
        _spacer(doc)

    # Alignment gap
    gap = ar.get("gap_themes", [])
    if gap:
        _heading(doc, "6.3 Topic Alignment Gaps", 2)
        _para(doc,
              "The following risk themes appear among the top event themes but are under-represented "
              "in LLC conversation topics. This may indicate a gap between where safety conversations "
              "are focused and where actual events are occurring.",
              size_pt=11, colour=NAVY)
        for theme in gap:
            _bullet(doc, theme)
        _spacer(doc)

    _para(doc, ar.get("note", ""), size_pt=10, colour=GREY)
    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 7. Safety Energy ↔ Events Relationship
# ─────────────────────────────────────────────────────────────────────────────

def _section_se_events(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "7. Relationship Between Safety Energy and Events", 1)
    rel  = results.se_events_rel
    chts = results.charts

    _callout(
        doc,
        "Coverage vs Outcome",
        "This section links leading activity coverage to event burden so leaders can see which business units, projects, and locations appear strongest, and which need direct intervention.",
        fill="f7fbff",
        accent=TEAL,
    )
    _spacer(doc)

    # BU comparison table
    _heading(doc, "7.1 Business Unit Comparison", 2)
    bu_comp = rel.get("bu_comparison", [])
    if bu_comp:
        rows = [
            [str(r.get("business_unit", "")),
             str(int(r.get("activities", 0))),
             str(int(r.get("events", 0))),
             str(r.get("ratio", "—") if r.get("ratio", "—") is not None else "—")]
            for r in bu_comp
        ]
        _add_table(doc,
                   ["Business Unit", "Activities", "Events", "Activity:Event Ratio"],
                   rows,
                   [2.5, 1.2, 1.0, 2.0])
        _spacer(doc)

    project_comp = rel.get("project_comparison", {})
    if project_comp.get("best") or project_comp.get("watch"):
        _heading(doc, "7.2 Project Performance Signals", 2)
        _para(doc,
              "These project comparisons use recorded Safety Energy activity against recorded events. They are intended as directional signals only, not league tables.",
              size_pt=11, colour=NAVY)
        _add_chart(doc, chts, "project_quadrant", 5.8)
        _spacer(doc)
        if project_comp.get("best"):
            rows = [
                [str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
                 str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")]
                for r in project_comp.get("best", [])
            ]
            _add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
            _spacer(doc)
        if project_comp.get("watch"):
            _para(doc, "Projects needing leadership attention:", bold=True, size_pt=11, colour=NAVY)
            rows = [
                [str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
                 str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")]
                for r in project_comp.get("watch", [])[:6]
            ]
            _add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
            _spacer(doc)

    location_comp = rel.get("location_comparison", {})
    if location_comp.get("best") or location_comp.get("watch"):
        _heading(doc, "7.3 Location Performance Signals", 2)
        if location_comp.get("best"):
            rows = [
                [str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
                 str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")]
                for r in location_comp.get("best", [])
            ]
            _add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
            _spacer(doc)
        if location_comp.get("watch"):
            _para(doc, "Locations needing leadership attention:", bold=True, size_pt=11, colour=NAVY)
            rows = [
                [str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
                 str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")]
                for r in location_comp.get("watch", [])[:6]
            ]
            _add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
            _spacer(doc)

    # Spike months
    spikes = rel.get("spike_months", [])
    if spikes:
        _heading(doc, "7.4 Event Spike Periods", 2)
        _para(doc,
              "The following periods recorded above-average event counts coinciding with "
              "below-average leading-activity volumes. These periods may warrant retrospective "
              "review.",
              size_pt=11, colour=NAVY)
        for m in spikes:
            _bullet(doc, m)
        _spacer(doc)

    # Topic alignment note
    _heading(doc, "7.5 Topic Alignment Observation", 2)
    _para(doc, rel.get("alignment_note", ""), size_pt=11, colour=NAVY)

    llc_top = rel.get("llc_top_topics", [])
    ev_top  = rel.get("ev_top_rc", [])
    if llc_top:
        _para(doc, "Top LLC topics:", bold=True, size_pt=11, colour=NAVY)
        for t in llc_top:
            _bullet(doc, str(t))
    if ev_top:
        _para(doc, "Top event root causes:", bold=True, size_pt=11, colour=NAVY)
        for t in ev_top:
            _bullet(doc, str(t))
    _spacer(doc)

    _para(doc, rel.get("note", ""), size_pt=10, colour=GREY)
    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 8. Leader Focus Areas
# ─────────────────────────────────────────────────────────────────────────────

def _section_focus_areas(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "8. Leader Focus Areas", 1)
    fa   = results.focus_areas
    trends = results.trends
    chts = results.charts

    _para(doc,
          "This section identifies Business Units and leaders that warrant specific leadership "
          "attention based on activity volumes, event rates, and observed trends.",
          size_pt=11, colour=NAVY)
    _spacer(doc)

    # BU summary table
    _heading(doc, "8.1 Business Unit Activity and Event Summary", 2)
    bu_summary = fa.get("bu_summary", [])
    if bu_summary:
        rows = [
            [str(r.get("business_unit", "")),
             str(r.get("activities", 0)),
             str(r.get("events", 0))]
            for r in bu_summary
        ]
        _add_table(doc,
                   ["Business Unit", "Leading Activities", "Events"],
                   rows,
                   [3.0, 2.0, 1.5])
        _spacer(doc)

    # Declining BUs
    declining = fa.get("declining_bus", [])
    if declining:
        _heading(doc, "8.2 Declining Activity Units", 2)
        _para(doc,
              "The following Business Units recorded significantly lower leading-activity "
              "volumes in the second half of the analysis period compared to the first half. "
              "Leaders in these units should be engaged to understand and address the decline.",
              size_pt=11, colour=NAVY)
        for bu in declining:
            _bullet(doc, bu)
        _spacer(doc)

    leadership_focus = trends.get("leadership_focus", [])
    if leadership_focus:
        _heading(doc, "8.3 Leadership Watchouts from Two-Year Safety Energy Trends", 2)
        for item in leadership_focus[:5]:
            _bullet(doc, item)
        _spacer(doc)

    low_value_units = trends.get("high_volume_low_value", [])
    if low_value_units:
        _heading(doc, "8.4 High-Volume / Low-Value Hotspots", 2)
        _para(doc,
              "These areas are recording substantial activity volume, but the record quality signals "
              "suggest the activity may be drifting toward compliance-only completion rather than strong learning.",
              size_pt=11, colour=NAVY)
        _add_chart(doc, chts, "low_value_units", 5.8)
        _spacer(doc)
        rows = [
            [
                r.get("activity_type", ""),
                str(r.get("business_unit", "")),
                str(r.get("count", 0)),
                f"{r.get('avg_quality', 0):.1f}",
                f"{r.get('shallow_pct', 0):.1f}%",
            ]
            for r in low_value_units[:8]
        ]
        _add_table(doc,
                   ["Type", "Business Unit", "Count", "Avg Quality", "Shallow"],
                   rows,
                   [0.9, 2.8, 0.9, 1.0, 1.0])
        _spacer(doc)

    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 9. Recommended Actions
# ─────────────────────────────────────────────────────────────────────────────

def _section_recommendations(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "9. Recommended Actions", 1)
    _callout(
        doc,
        "Action Agenda",
        "These actions are generated directly from the event hotspots, leading-activity quality signals, and project/location performance patterns in the report.",
        fill="eef6fb",
        accent=TEAL,
    )
    _spacer(doc)

    for i, rec in enumerate(results.recommendations[:10], 1):
        priority = "Immediate" if i <= 3 else "Next"
        _callout(doc, f"{priority} Priority", rec, fill="f7fbff" if i <= 3 else "ffffff", accent=ALERT if i <= 3 else NAVY)
        _spacer(doc)

    _spacer(doc)
    _heading(doc, "Ongoing Monitoring Recommendations", 2)
    for item in [
        "Track leading-activity volumes monthly by Business Unit against a set target (e.g. "
        "minimum 4 LLCs per leader per month).",
        "Track CCC/OCC/LLC quality monthly using shallow-entry rate, follow-up rate, and average quality score.",
        "Review LLC topic coverage quarterly to ensure alignment with top event root causes.",
        "Re-run this full report monthly or quarterly as new data becomes available.",
        "Supplement quantitative analysis with qualitative review of LLC content quality.",
        "Use the Business Unit activity-to-event ratio table to guide where SHEQ advisor "
        "engagement should be prioritised.",
    ]:
        _bullet(doc, item)

    _page_break(doc)


# ─────────────────────────────────────────────────────────────────────────────
# 10. Methodology and Caveats
# ─────────────────────────────────────────────────────────────────────────────

def _section_methodology(doc: Document, results: AnalysisResults) -> None:
    _heading(doc, "10. Methodology and Caveats", 1)
    trends = results.trends

    _heading(doc, "10.1 Data Sources", 2)
    rows = [
        ["Events.xlsx",
         "Incident and event records exported from the Ventia safety management system. "
         "Covers all event types including injuries, motor vehicle events, close calls, "
         "environmental events, and nonconformances."],
        ["Safety_Energy.xlsx",
         "Combined leading activity export covering all three activity types: Leader Learning "
         "Conversations (LLC), Critical Control Checks (CCC), and Operational Control Checks (OCC). "
         "This is treated as the primary leading indicator data source."],
        ["LLC_Data.xlsx",
         "Supplementary LLC export providing richer free-text data (conversation topics, CRP focus, "
         "at-risk observations). Used primarily for theme and topic analysis. Record counts are "
         "closely aligned with the LLC records in Safety_Energy.xlsx."],
    ]
    _add_table(doc, ["Source", "Description"], rows, [2.0, 4.5])
    _spacer(doc)

    _heading(doc, "10.2 Activity Type Definitions", 2)
    _para(doc,
          "Safety Energy is the combined analytical domain. It encompasses three activity types:",
          size_pt=11, colour=NAVY)
    for item in [
        "LLC (Leader Learning Conversation): A structured conversation between a leader and "
        "a worker or work group, focused on safety topics, risk identification, and critical "
        "controls.",
        "CCC (Critical Control Check): A field verification that critical controls for high-risk "
        "activities are in place and effective (e.g. working at height, hazardous energies).",
        "OCC (Operational Control Check): A broader operational inspection or check covering "
        "a range of work-area risk topics.",
        "Note: In some legacy documentation or older exports, the label 'OCC' was used broadly "
        "to cover what is now split into CCC and OCC. The current Safety_Energy.xlsx export "
        "correctly separates these via the ModuleType field. No manual deduplication was required.",
    ]:
        _bullet(doc, item)
    _spacer(doc)

    _heading(doc, "10.3 Analytical Approach", 2)
    for item in [
        "Monthly trend analysis: Activities and events are aggregated by calendar month. "
        "Trend direction is estimated by comparing recent-period averages against prior-period averages.",
        f"Rolling two-year Safety Energy review: deeper trend and quality analysis uses a {trends.get('window_months', 24)}-month "
        f"window from {trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}, anchored to the latest Safety Energy record.",
        "Effectiveness analysis: Business unit-level aggregates and overall monthly correlations "
        "are used as proxies for effectiveness. Correlation is computed using Pearson r.",
        "At-risk theme extraction: Free-text fields are scanned using a predefined keyword "
        "dictionary (see config.py). Frequency counts are combined across sources with a "
        "2× weight applied to event-source mentions (lagging signal).",
        "Business Unit focus: BUs are flagged as 'declining' if second-half activity volume "
        "is less than 70% of first-half volume within the analysis window.",
        "Leading-activity quality scoring: records are scored using practical proxies including text richness, specificity, "
        "risk recognition, action/follow-up language, learning evidence, and penalties for generic or duplicated wording.",
    ]:
        _bullet(doc, item)
    _spacer(doc)

    _heading(doc, "10.4 Caveats and Limitations", 2)
    for cav in results.caveats:
        _bullet(doc, cav)

    _spacer(doc)
    _para(doc,
          f"Report generated: {datetime.now().strftime('%d %B %Y at %H:%M')}",
          size_pt=9, colour=GREY)


# ─────────────────────────────────────────────────────────────────────────────
# Main entry point
# ─────────────────────────────────────────────────────────────────────────────

def build_report(results: AnalysisResults, output_dir: str) -> str:
    """
    Build the full SHEQ DOCX report from an AnalysisResults object.

    Parameters
    ----------
    results    : output of analysis_engine.run_full_analysis
    output_dir : directory to write the .docx file into

    Returns
    -------
    Absolute path to the generated .docx file.
    """
    os.makedirs(output_dir, exist_ok=True)
    doc = Document()
    _bootstrap_styles(doc)

    log.info("Building DOCX report...")

    _title_page(doc, results)
    _section_executive_summary(doc, results)
    _section_data_quality(doc, results)
    _section_events(doc, results)
    _section_leading_overview(doc, results)
    _section_effectiveness(doc, results)
    _section_at_risk(doc, results)
    _section_se_events(doc, results)
    _section_focus_areas(doc, results)
    _section_recommendations(doc, results)
    _section_methodology(doc, results)

    output_path = os.path.join(
        output_dir,
        f"SHEQ_Safety_Performance_{datetime.now().strftime('%Y%m%d_%H%M')}.docx",
    )
    doc.save(output_path)
    log.info("Report saved to %s", output_path)
    return output_path