""" report_builder.py — DOCX report generation for the SHEQ Analysis Tool. Takes an AnalysisResults object from analysis_engine and produces a fully-formatted DOCX report following the Ventia brand guidelines in DESIGN.md. Public API ---------- build_report(results: AnalysisResults, output_dir: str) -> str Returns the path to the generated .docx file. """ from __future__ import annotations import logging import os from datetime import datetime from typing import Any import pandas as pd from docx import Document from docx.enum.table import WD_TABLE_ALIGNMENT from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml import parse_xml from docx.oxml.ns import nsdecls from docx.shared import Cm, Inches, Pt, RGBColor from analysis_engine import AnalysisResults from config import ( CONSEQUENCE_ORDER, DEEP_BLUE, SKY_BLUE, DARK_GREEN, MUTED, AMBER, RED, ) log = logging.getLogger(__name__) # ── Brand RGBColor objects ──────────────────────────────────────────────────── NAVY = RGBColor(0x0B, 0x32, 0x54) # Deep Blue TEAL = RGBColor(0x13, 0xB5, 0xEA) # Sky Blue GREEN = RGBColor(0x00, 0x6E, 0x47) # Dark Green GREY = RGBColor(0x64, 0x74, 0x8B) # Muted / slate grey ALERT = RGBColor(0xDC, 0x26, 0x26) # Red # ───────────────────────────────────────────────────────────────────────────── # Low-level DOCX helpers # ───────────────────────────────────────────────────────────────────────────── def _shading(cell, hex_color: str) -> None: """Apply background fill to a table cell.""" shd = parse_xml( f'' ) cell._tc.get_or_add_tcPr().append(shd) def _run(para, text: str, bold: bool = False, size_pt: int = 11, colour: RGBColor = NAVY, italic: bool = False) -> None: run = para.add_run(text) run.bold = bold run.italic = italic run.font.size = Pt(size_pt) if isinstance(colour, str): colour = RGBColor.from_string(colour.replace("#", "").upper()) run.font.color.rgb = colour run.font.name = "Source Sans Pro" def _heading(doc: Document, text: str, level: int) -> None: doc.add_heading(text, level=level) def _para(doc: Document, text: str = "", bold: bool = False, size_pt: int = 11, colour: RGBColor = NAVY) -> None: p = doc.add_paragraph() _run(p, text, bold=bold, size_pt=size_pt, colour=colour) def _bullet(doc: Document, text: str, size_pt: int = 11) -> None: p = doc.add_paragraph(style="List Bullet") _run(p, text, size_pt=size_pt, colour=NAVY) def _callout(doc: Document, title: str, text: str, fill: str = "f0f5fa", accent: RGBColor = TEAL) -> None: table = doc.add_table(rows=1, cols=1) table.alignment = WD_TABLE_ALIGNMENT.LEFT table.style = "Table Grid" cell = table.cell(0, 0) _shading(cell, fill) cell.text = "" p1 = cell.paragraphs[0] _run(p1, title, bold=True, size_pt=11, colour=accent) p2 = cell.add_paragraph() _run(p2, text, size_pt=10, colour=NAVY) def _metric_cards(doc: Document, cards: list[tuple[str, str]], cols: int = 4, fill: str = "f0f5fa") -> None: if not cards: return rows = (len(cards) + cols - 1) // cols table = doc.add_table(rows=rows, cols=cols) table.alignment = WD_TABLE_ALIGNMENT.LEFT table.style = "Table Grid" idx = 0 for r in range(rows): for c in range(cols): cell = table.cell(r, c) cell.text = "" _shading(cell, fill if idx % 2 == 0 else "ffffff") if idx < len(cards): title, value = cards[idx] p1 = cell.paragraphs[0] _run(p1, title, bold=False, size_pt=8, colour=GREY) p1.alignment = WD_ALIGN_PARAGRAPH.CENTER p2 = cell.add_paragraph() _run(p2, value, bold=True, size_pt=16, colour=NAVY) p2.alignment = WD_ALIGN_PARAGRAPH.CENTER idx += 1 def _add_table( doc: Document, headers: list[str], rows: list[list[str]], col_widths: list[float], # inches header_fill: str = "0b3254", alt_fill: str = "f0f5fa", ) -> None: """Add a brand-styled table with a Deep Blue header row.""" table = doc.add_table(rows=1 + len(rows), cols=len(headers)) table.alignment = WD_TABLE_ALIGNMENT.LEFT table.style = "Table Grid" # Header row for i, h in enumerate(headers): cell = table.rows[0].cells[i] cell.text = "" p = cell.paragraphs[0] run = p.add_run(h) run.bold = True run.font.size = Pt(9) run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) run.font.name = "Source Sans Pro" _shading(cell, header_fill) # Data rows for ri, row in enumerate(rows): for ci, val in enumerate(row): cell = table.rows[ri + 1].cells[ci] cell.text = "" p = cell.paragraphs[0] run = p.add_run(str(val) if val is not None else "—") run.font.size = Pt(9) run.font.name = "Source Sans Pro" bg = alt_fill if ri % 2 == 0 else "ffffff" _shading(cell, bg) # Column widths for ri, row in enumerate(table.rows): for ci, w in enumerate(col_widths): if ci < len(row.cells): row.cells[ci].width = Inches(w) def _add_chart(doc: Document, charts: dict[str, str], key: str, width_in: float = 5.5) -> None: """Insert a chart image if it exists.""" path = charts.get(key) if path and os.path.exists(path): doc.add_picture(path, width=Inches(width_in)) else: _para(doc, f"[Chart '{key}' not available]", colour=GREY, size_pt=9) def _spacer(doc: Document) -> None: doc.add_paragraph("") def _page_break(doc: Document) -> None: doc.add_page_break() # ───────────────────────────────────────────────────────────────────────────── # Document bootstrap # ───────────────────────────────────────────────────────────────────────────── def _bootstrap_styles(doc: Document) -> None: """Apply brand typography to the Document's built-in styles.""" style = doc.styles["Normal"] style.font.name = "Source Sans Pro" style.font.size = Pt(11) for level, size, colour in [ (1, 16, NAVY), (2, 13, TEAL), (3, 11, NAVY), ]: hs = doc.styles[f"Heading {level}"] hs.font.name = "Source Sans Pro" hs.font.size = Pt(size) hs.font.color.rgb = colour hs.font.bold = True sec = doc.sections[0] sec.top_margin = Cm(1.5) sec.bottom_margin = Cm(1.4) sec.left_margin = Cm(1.6) sec.right_margin = Cm(1.6) # ───────────────────────────────────────────────────────────────────────────── # Title page # ───────────────────────────────────────────────────────────────────────────── def _title_page(doc: Document, results: AnalysisResults) -> None: dq = results.data_quality ev_from = dq.get("events", {}).get("date_from", "N/A") ev_to = dq.get("events", {}).get("date_to", "N/A") se_from = dq.get("safety_energy", {}).get("date_from", "N/A") se_to = dq.get("safety_energy", {}).get("date_to", "N/A") banner = doc.add_table(rows=1, cols=1) banner.style = "Table Grid" cell = banner.cell(0, 0) _shading(cell, "0b3254") cell.text = "" p = cell.paragraphs[0] p.alignment = WD_ALIGN_PARAGRAPH.LEFT _run(p, "SHEQ Safety Performance Report", bold=True, size_pt=26, colour=RGBColor(0xFF, 0xFF, 0xFF)) p2 = cell.add_paragraph() _run(p2, "Safety Energy, event risk, and leadership focus areas", size_pt=13, colour=RGBColor(0xD7, 0xF2, 0xFF)) _spacer(doc) _callout( doc, "Report Scope", f"Events window: {ev_from} – {ev_to}. Leading activity window: {se_from} – {se_to}. " "Built for senior leaders as a concise decision-support pack rather than a compliance summary.", fill="eef6fb", accent=TEAL, ) _spacer(doc) _metric_cards(doc, [ ("Generated", datetime.now().strftime("%d %b %Y")), ("Events Coverage", f"{ev_from} to {ev_to}"), ("Safety Energy Coverage", f"{se_from} to {se_to}"), ("Audience", "Executive / Board"), ], cols=2) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 1. Executive Summary # ───────────────────────────────────────────────────────────────────────────── def _section_executive_summary(doc: Document, results: AnalysisResults) -> None: _heading(doc, "1. Executive Summary", 1) ev = results.events_summary lead = results.leading_summary trends = results.trends recs = results.recommendations totals = lead.get("totals", {}) _callout( doc, "Leadership Readout", "This summary highlights where event risk is concentrated, where leading activities appear strongest or weakest, " "and what senior leaders should prioritise next.", fill="eef6fb", accent=TEAL, ) _spacer(doc) _metric_cards(doc, [ ("Total Events", str(ev.get("total", 0))), ("Moderate+ Events", f"{ev.get('serious_count', 0)}"), ("Events / Month", f"{ev.get('events_per_month', 0):.1f}"), ("Motor Vehicle Events", str(ev.get("motor_vehicle", {}).get("count", 0))), ("LLCs", str(totals.get("LLC", 0))), ("CCCs", str(totals.get("CCC", 0))), ("OCCs", str(totals.get("OCC", 0))), ("CCC Avg Quality", f"{trends.get('activity_insights', {}).get('CCC', {}).get('avg_quality', 0):.1f}"), ], cols=4) _spacer(doc) _heading(doc, "1.1 What Leaders Should Know", 2) for rec in trends.get("executive_summary", [])[:4]: _bullet(doc, rec) _spacer(doc) _heading(doc, "1.2 Board Visual Snapshot", 2) _add_chart(doc, results.charts, "quality_mix", 6.0) _spacer(doc) _add_chart(doc, results.charts, "project_quadrant", 6.0) _spacer(doc) _heading(doc, "1.3 Priority Actions", 2) for rec in recs[:5]: _callout(doc, "Recommended Action", rec, fill="f7fbff", accent=NAVY) _spacer(doc) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 2. Data Quality # ───────────────────────────────────────────────────────────────────────────── def _section_data_quality(doc: Document, results: AnalysisResults) -> None: _heading(doc, "2. Data Quality and Coverage", 1) dq = results.data_quality _para(doc, "This section summarises the completeness and date coverage of each data source. " "Any gaps identified here may affect the reliability of subsequent analysis sections.", size_pt=11, colour=NAVY) _spacer(doc) sources = [ ("Events", dq.get("events", {}), [("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"), ("Null — Event Type", "null_event_type"), ("Null — Consequence", "null_consequence"), ("Null — Business Unit", "null_business_unit"), ("Null — Root Cause", "null_root_cause")]), ("Safety Energy", dq.get("safety_energy", {}), [("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"), ("Null — Leader", "null_leader"), ("Null — Business Unit", "null_bu")]), ("LLC Data", dq.get("llc", {}), [("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"), ("Null — Topic", "null_topic"), ("Null — Leader", "null_leader")]), ] for label, data, fields in sources: _heading(doc, label, 2) rows = [[f, str(data.get(k, "N/A"))] for f, k in fields] _add_table(doc, ["Field", "Value"], rows, [3.0, 3.5]) _spacer(doc) # Activity type breakdown for Safety Energy if label == "Safety Energy": breakdown = data.get("type_breakdown", {}) if breakdown: br_rows = [[k, str(v)] for k, v in sorted(breakdown.items())] _add_table(doc, ["Activity Type", "Count"], br_rows, [3.0, 3.5]) _spacer(doc) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 3. Events Analysis # ───────────────────────────────────────────────────────────────────────────── def _section_events(doc: Document, results: AnalysisResults) -> None: _heading(doc, "3. Events Analysis", 1) ev = results.events_summary chts = results.charts _callout( doc, "Event Story", "This section focuses on where event burden is building, where serious outcomes are concentrated, and what special risk signals are visible in timing and motor vehicle data.", fill="f7fbff", accent=TEAL, ) _spacer(doc) # Monthly trend chart _heading(doc, "3.1 Monthly Trend", 2) _add_chart(doc, chts, "events_monthly", 6.0) _spacer(doc) # Consequence chart _heading(doc, "3.2 Consequence Severity", 2) _add_chart(doc, chts, "consequence", 5.5) _spacer(doc) # Event type table _heading(doc, "3.3 Event Type Breakdown", 2) event_types = ev.get("event_type_counts", {}) total_events = max(ev.get("total", 1), 1) et_rows = [[t, str(c), f"{c/total_events*100:.1f}%"] for t, c in event_types.items()] _add_table(doc, ["Event Type", "Count", "%"], et_rows, [3.0, 1.2, 1.0]) _spacer(doc) # CRP table crp = ev.get("crp_counts", {}) if crp: _heading(doc, "3.4 Critical Risk Protocols Involved", 2) crp_rows = [[c, str(v)] for c, v in crp.items()] _add_table(doc, ["CRP", "Count"], crp_rows, [4.0, 1.5]) _spacer(doc) # Root cause table rc = ev.get("root_cause_counts", {}) if rc: _heading(doc, "3.5 Root Cause Categories", 2) rc_rows = [[r, str(v)] for r, v in rc.items()] _add_table(doc, ["Root Cause", "Count"], rc_rows, [4.0, 1.5]) _spacer(doc) serious_projects = ev.get("serious_projects", {}) serious_locations = ev.get("serious_locations", {}) if serious_projects or serious_locations: _heading(doc, "3.6 Serious Event Hotspots", 2) _para(doc, "These are the projects and locations with the highest counts of moderate, major, or substantial events in the analysis window.", size_pt=11, colour=NAVY) _add_chart(doc, chts, "serious_hotspots", 5.8) _spacer(doc) if serious_projects: rows = [[k, str(v)] for k, v in serious_projects.items()] _add_table(doc, ["Project", "Serious Events"], rows, [4.0, 1.5]) _spacer(doc) if serious_locations: rows = [[k, str(v)] for k, v in serious_locations.items()] _add_table(doc, ["Location", "Serious Events"], rows, [4.0, 1.5]) _spacer(doc) time_buckets = ev.get("serious_time_buckets", {}) if time_buckets: _heading(doc, "3.7 Serious Event Timing", 2) _para(doc, f"Time-of-day information was available for {ev.get('serious_time_coverage_pct', 0):.1f}% of serious events.", size_pt=11, colour=NAVY) rows = [[bucket, str(count)] for bucket, count in time_buckets.items()] _add_table(doc, ["Time of Day", "Serious Events"], rows, [3.5, 1.5]) _spacer(doc) motor = ev.get("motor_vehicle", {}) if motor.get("count", 0) > 0: _heading(doc, "3.8 Motor Vehicle Events", 2) mv_rows = [ ["Total Motor Vehicle Events", str(motor.get("count", 0))], ["Share of All Events", f"{motor.get('pct_total', 0):.1f}%"], ["Moderate+ MV Events", str(motor.get("serious_count", 0))], ["Serious Rate Within MV Events", f"{motor.get('serious_pct_within_mve', 0):.1f}%"], ] _add_table(doc, ["Metric", "Value"], mv_rows, [3.5, 2.0]) _spacer(doc) if motor.get("top_projects"): rows = [[k, str(v)] for k, v in motor.get("top_projects", {}).items()] _add_table(doc, ["Project", "MV Events"], rows, [4.0, 1.5]) _spacer(doc) rows = [] for label, values in [ ("Road Type", motor.get("road_types", {})), ("Road Condition", motor.get("conditions", {})), ("Vehicle Type", motor.get("vehicle_types", {})), ]: for key, val in list(values.items())[:4]: rows.append([label, key, str(val)]) if rows: _add_table(doc, ["Category", "Pattern", "Count"], rows, [1.5, 3.0, 1.0]) _spacer(doc) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 4. Leading Activity Overview # ───────────────────────────────────────────────────────────────────────────── def _section_leading_overview(doc: Document, results: AnalysisResults) -> None: _heading(doc, "4. Safety Energy — Leading Activity Overview", 1) lead = results.leading_summary trends = results.trends chts = results.charts _callout( doc, "Leading Activity Readout", "Safety Energy combines LLC, CCC, and OCC activity. The deeper two-year review is designed to show not just volume, " "but whether the records look rich, preventive, and useful for leadership learning.", fill="eef6fb", accent=TEAL, ) _spacer(doc) # Activity mix donut _heading(doc, "4.1 Activity Type Mix", 2) _add_chart(doc, chts, "activity_mix", 4.5) _spacer(doc) # Totals table totals = lead.get("totals", {}) avg_at_risk = lead.get("avg_at_risk", {}) if totals: rows = [ [atype, str(totals.get(atype, 0)), f"{avg_at_risk.get(atype, 0):.2f}"] for atype in ["LLC", "CCC", "OCC"] if totals.get(atype, 0) > 0 ] _add_table( doc, ["Activity Type", "Total Count", "Avg At-Risk Aspects per Activity"], rows, [2.5, 1.5, 3.0], ) _spacer(doc) # Monthly trend chart _heading(doc, "4.2 Monthly Activity Trend", 2) _para(doc, f"Overall trend: {lead.get('activity_trend', 'N/A')}", size_pt=11, colour=GREY) _add_chart(doc, chts, "leading_monthly", 6.0) _spacer(doc) # BU breakdown _heading(doc, "4.3 Activity by Business Unit", 2) bu_by_type = lead.get("bu_by_type", {}) all_bus: list[str] = sorted(set( bu for d in bu_by_type.values() for bu in d.keys() )) if all_bus: rows = [ [bu] + [str(bu_by_type.get(at, {}).get(bu, 0)) for at in ["LLC", "CCC", "OCC"]] for bu in all_bus ] _add_table(doc, ["Business Unit", "LLC", "CCC", "OCC"], rows, [2.8, 1.0, 1.0, 1.0]) _spacer(doc) # LLC topic breakdown (from LLC_Data) _heading(doc, "4.4 LLC Conversation Topics", 2) _para(doc, "The following topics were most frequently recorded in Leader Learning Conversations. " "Topic coverage indicates where leaders are directing their field conversations.", size_pt=11, colour=NAVY) _add_chart(doc, chts, "llc_topics", 5.5) _spacer(doc) top_topics = lead.get("top_topics", {}) if top_topics: rows = [[k, str(v)] for k, v in list(top_topics.items())[:12]] _add_table(doc, ["Topic", "Count"], rows, [4.0, 1.5]) _spacer(doc) # CRP focus _heading(doc, "4.5 CRP Focus Areas in LLCs", 2) _add_chart(doc, chts, "crp_focus", 5.5) _spacer(doc) # Top leaders _heading(doc, "4.6 Most Active Leaders (LLC)", 2) _add_chart(doc, chts, "top_leaders", 5.5) _spacer(doc) top_leaders = lead.get("top_leaders", {}) if top_leaders: rows = [[l, str(c)] for l, c in list(top_leaders.items())[:12]] _add_table(doc, ["Leader", "LLC Count"], rows, [4.0, 1.5]) _spacer(doc) # Two-year trend and quality view _heading(doc, "4.7 Rolling Two-Year Trend and Quality View", 2) _para(doc, f"This view uses a rolling two-year Safety Energy window from " f"{trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}. " "It is designed to answer not just whether activities were completed, but whether " "the records suggest meaningful learning, risk recognition, and follow-up quality.", size_pt=11, colour=NAVY) _para(doc, trends.get("proxy_note", ""), size_pt=10, colour=GREY) _add_chart(doc, chts, "quality_trend", 6.0) _spacer(doc) _add_chart(doc, chts, "quality_mix", 6.0) _spacer(doc) quality_rows = trends.get("quality_by_type", []) if quality_rows: rows = [ [ r.get("activity_type", ""), str(r.get("count", 0)), f"{r.get('avg_quality', 0):.1f}", f"{r.get('avg_input_depth', 0):.1f}", f"{r.get('meaningful_pct', 0):.1f}%", f"{r.get('high_value_pct', 0):.1f}%", f"{r.get('shallow_pct', 0):.1f}%", f"{r.get('follow_up_pct', 0):.1f}%", ] for r in quality_rows ] _add_table( doc, ["Type", "Count", "Avg Quality", "Avg Input Depth", "Meaningful", "High Value", "Shallow", "Follow-up"], rows, [0.8, 0.8, 0.9, 1.0, 0.9, 0.9, 0.9, 0.9], ) _spacer(doc) input_depth = trends.get("input_depth", {}) if input_depth.get("correlation") is not None: _heading(doc, "4.8 Input Depth as a Supporting Quality Metric", 2) _para(doc, f"Across the two-year Safety Energy window, input depth and quality score are correlated at r = {input_depth.get('correlation'):.2f}. " f"{input_depth.get('note', '')}", size_pt=11, colour=NAVY) by_band = input_depth.get("by_band", []) if by_band: rows = [ [ r.get("band", ""), str(r.get("count", 0)), f"{r.get('avg_input_depth', 0):.1f}", f"{r.get('avg_quality', 0):.1f}", f"{r.get('meaningful_pct', 0):.1f}%", f"{r.get('high_value_pct', 0):.1f}%", f"{r.get('shallow_pct', 0):.1f}%", ] for r in by_band ] _add_table( doc, ["Band", "Count", "Avg Input Depth", "Avg Quality", "Meaningful", "High Value", "Shallow"], rows, [0.9, 0.8, 1.0, 0.9, 0.9, 0.9, 0.9], ) _spacer(doc) top_themes = trends.get("top_themes", {}) if top_themes: _para(doc, "Most common recurring themes in Safety Energy narratives:", bold=True, size_pt=11, colour=NAVY) for theme, count in list(top_themes.items())[:5]: _bullet(doc, f"{theme}: {count} mentions") _spacer(doc) _heading(doc, "4.9 CCC / OCCC / LLC Value Signals", 2) activity_insights = trends.get("activity_insights", {}) for atype in ["CCC", "OCC", "LLC"]: insight = activity_insights.get(atype, {}) if not insight: continue _para( doc, f"{atype}: average quality {insight.get('avg_quality', 0):.1f}/100, " f"average input depth {insight.get('avg_input_depth', 0):.1f}/100, " f"{insight.get('preventive_pct', 0):.1f}% preventive signal, " f"{insight.get('reactive_pct', 0):.1f}% reactive signal, " f"{insight.get('repetitive_pct', 0):.1f}% repetitive signal, " f"{insight.get('shallow_pct', 0):.1f}% shallow.", size_pt=11, colour=NAVY, ) depth = insight.get("input_depth", {}) if depth.get("correlation") is not None: _para(doc, f"For {atype}, input depth vs quality correlation is r = {depth.get('correlation'):.2f}.", size_pt=10, colour=GREY) top_modules = insight.get("top_modules", {}) if top_modules: _para(doc, f"Top {atype} focus areas:", bold=True, size_pt=10, colour=GREY) for label, count in list(top_modules.items())[:4]: _bullet(doc, f"{label}: {count}") _spacer(doc) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 5. Effectiveness of Leading Activities # ───────────────────────────────────────────────────────────────────────────── def _section_effectiveness(doc: Document, results: AnalysisResults) -> None: _heading(doc, "5. Effectiveness of Leading Activities", 1) eff = results.effectiveness chts = results.charts _para(doc, "This section examines whether leading activity patterns appear associated with " "event outcomes at a business unit and portfolio level. All findings are associative " "only — correlation does not imply causation.", size_pt=11, colour=NAVY) _spacer(doc) # Monthly overlay chart _heading(doc, "5.1 Monthly Activities vs Events Overlay", 2) _add_chart(doc, chts, "overlay", 6.0) _spacer(doc) # Correlation note _heading(doc, "5.2 Statistical Association", 2) _para(doc, eff.get("corr_note", "N/A"), size_pt=11, colour=NAVY) _spacer(doc) # BU comparison chart _heading(doc, "5.3 Activities vs Events by Business Unit", 2) _add_chart(doc, chts, "bu_comparison", 5.5) _spacer(doc) # BU table bu_table = eff.get("bu_table", []) if bu_table: rows = [ [r.get("business_unit", ""), str(r.get("activities", 0)), str(r.get("events", 0))] for r in bu_table ] _add_table(doc, ["Business Unit", "Leading Activities", "Events"], rows, [3.0, 2.0, 1.5]) _spacer(doc) # Pattern commentary high_both = eff.get("high_activity_high_events", []) high_acts = eff.get("high_activity_low_events", []) if high_both: _heading(doc, "5.4 High Activity / High Events — Pattern of Interest", 2) _para(doc, "The following Business Units recorded both above-median leading-activity volumes " "and above-median event counts. This may indicate reactive activity patterns where " "engagement is increasing in response to events rather than preventing them. " "Further investigation is recommended.", size_pt=11, colour=NAVY) for bu in high_both: _bullet(doc, bu) _spacer(doc) if high_acts: _heading(doc, "5.5 High Activity / Low Events — Positive Signal", 2) _para(doc, "The following Business Units recorded above-median leading-activity volumes " "and below-median event counts. This pattern is consistent with leading activities " "having a preventive effect, though this cannot be confirmed from available data.", size_pt=11, colour=NAVY) for bu in high_acts: _bullet(doc, bu) _spacer(doc) _para(doc, eff.get("note", ""), size_pt=10, colour=GREY) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 6. At-Risk Behaviours # ───────────────────────────────────────────────────────────────────────────── def _section_at_risk(doc: Document, results: AnalysisResults) -> None: _heading(doc, "6. At-Risk Behaviours", 1) ar = results.at_risk chts = results.charts _para(doc, "At-risk behaviour themes are identified by analysing free-text fields across all " "three data sources (Events descriptions, LLC conversation notes, and Safety Energy " "observations) using keyword matching against known risk categories.", size_pt=11, colour=NAVY) _spacer(doc) _heading(doc, "6.1 Combined Theme Frequency", 2) _add_chart(doc, chts, "at_risk_themes", 5.5) _spacer(doc) combined = ar.get("combined_themes", {}) if combined: rows = [[k, str(v)] for k, v in combined.items()] _add_table( doc, ["Risk Theme", "Weighted Frequency"], rows, [3.5, 2.0], ) _spacer(doc) # LLC theme focus vs event themes _heading(doc, "6.2 LLC Conversation Topics vs Event Themes", 2) llc_themes = ar.get("llc_themes", {}) ev_themes = ar.get("event_themes", {}) if llc_themes or ev_themes: all_themes = sorted( set(list(llc_themes.keys()) + list(ev_themes.keys())) ) rows = [ [t, str(llc_themes.get(t, 0)), str(ev_themes.get(t, 0))] for t in all_themes ] _add_table(doc, ["Risk Theme", "LLC Mentions", "Event Mentions"], rows, [3.0, 1.5, 1.5]) _spacer(doc) # Alignment gap gap = ar.get("gap_themes", []) if gap: _heading(doc, "6.3 Topic Alignment Gaps", 2) _para(doc, "The following risk themes appear among the top event themes but are under-represented " "in LLC conversation topics. This may indicate a gap between where safety conversations " "are focused and where actual events are occurring.", size_pt=11, colour=NAVY) for theme in gap: _bullet(doc, theme) _spacer(doc) _para(doc, ar.get("note", ""), size_pt=10, colour=GREY) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 7. Safety Energy ↔ Events Relationship # ───────────────────────────────────────────────────────────────────────────── def _section_se_events(doc: Document, results: AnalysisResults) -> None: _heading(doc, "7. Relationship Between Safety Energy and Events", 1) rel = results.se_events_rel chts = results.charts _callout( doc, "Coverage vs Outcome", "This section links leading activity coverage to event burden so leaders can see which business units, projects, and locations appear strongest, and which need direct intervention.", fill="f7fbff", accent=TEAL, ) _spacer(doc) # BU comparison table _heading(doc, "7.1 Business Unit Comparison", 2) bu_comp = rel.get("bu_comparison", []) if bu_comp: rows = [ [str(r.get("business_unit", "")), str(int(r.get("activities", 0))), str(int(r.get("events", 0))), str(r.get("ratio", "—") if r.get("ratio", "—") is not None else "—")] for r in bu_comp ] _add_table(doc, ["Business Unit", "Activities", "Events", "Activity:Event Ratio"], rows, [2.5, 1.2, 1.0, 2.0]) _spacer(doc) project_comp = rel.get("project_comparison", {}) if project_comp.get("best") or project_comp.get("watch"): _heading(doc, "7.2 Project Performance Signals", 2) _para(doc, "These project comparisons use recorded Safety Energy activity against recorded events. They are intended as directional signals only, not league tables.", size_pt=11, colour=NAVY) _add_chart(doc, chts, "project_quadrant", 5.8) _spacer(doc) if project_comp.get("best"): rows = [ [str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)), str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")] for r in project_comp.get("best", []) ] _add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3]) _spacer(doc) if project_comp.get("watch"): _para(doc, "Projects needing leadership attention:", bold=True, size_pt=11, colour=NAVY) rows = [ [str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)), str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")] for r in project_comp.get("watch", [])[:6] ] _add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3]) _spacer(doc) location_comp = rel.get("location_comparison", {}) if location_comp.get("best") or location_comp.get("watch"): _heading(doc, "7.3 Location Performance Signals", 2) if location_comp.get("best"): rows = [ [str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)), str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")] for r in location_comp.get("best", []) ] _add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3]) _spacer(doc) if location_comp.get("watch"): _para(doc, "Locations needing leadership attention:", bold=True, size_pt=11, colour=NAVY) rows = [ [str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)), str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")] for r in location_comp.get("watch", [])[:6] ] _add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3]) _spacer(doc) # Spike months spikes = rel.get("spike_months", []) if spikes: _heading(doc, "7.4 Event Spike Periods", 2) _para(doc, "The following periods recorded above-average event counts coinciding with " "below-average leading-activity volumes. These periods may warrant retrospective " "review.", size_pt=11, colour=NAVY) for m in spikes: _bullet(doc, m) _spacer(doc) # Topic alignment note _heading(doc, "7.5 Topic Alignment Observation", 2) _para(doc, rel.get("alignment_note", ""), size_pt=11, colour=NAVY) llc_top = rel.get("llc_top_topics", []) ev_top = rel.get("ev_top_rc", []) if llc_top: _para(doc, "Top LLC topics:", bold=True, size_pt=11, colour=NAVY) for t in llc_top: _bullet(doc, str(t)) if ev_top: _para(doc, "Top event root causes:", bold=True, size_pt=11, colour=NAVY) for t in ev_top: _bullet(doc, str(t)) _spacer(doc) _para(doc, rel.get("note", ""), size_pt=10, colour=GREY) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 8. Leader Focus Areas # ───────────────────────────────────────────────────────────────────────────── def _section_focus_areas(doc: Document, results: AnalysisResults) -> None: _heading(doc, "8. Leader Focus Areas", 1) fa = results.focus_areas trends = results.trends chts = results.charts _para(doc, "This section identifies Business Units and leaders that warrant specific leadership " "attention based on activity volumes, event rates, and observed trends.", size_pt=11, colour=NAVY) _spacer(doc) # BU summary table _heading(doc, "8.1 Business Unit Activity and Event Summary", 2) bu_summary = fa.get("bu_summary", []) if bu_summary: rows = [ [str(r.get("business_unit", "")), str(r.get("activities", 0)), str(r.get("events", 0))] for r in bu_summary ] _add_table(doc, ["Business Unit", "Leading Activities", "Events"], rows, [3.0, 2.0, 1.5]) _spacer(doc) # Declining BUs declining = fa.get("declining_bus", []) if declining: _heading(doc, "8.2 Declining Activity Units", 2) _para(doc, "The following Business Units recorded significantly lower leading-activity " "volumes in the second half of the analysis period compared to the first half. " "Leaders in these units should be engaged to understand and address the decline.", size_pt=11, colour=NAVY) for bu in declining: _bullet(doc, bu) _spacer(doc) leadership_focus = trends.get("leadership_focus", []) if leadership_focus: _heading(doc, "8.3 Leadership Watchouts from Two-Year Safety Energy Trends", 2) for item in leadership_focus[:5]: _bullet(doc, item) _spacer(doc) low_value_units = trends.get("high_volume_low_value", []) if low_value_units: _heading(doc, "8.4 High-Volume / Low-Value Hotspots", 2) _para(doc, "These areas are recording substantial activity volume, but the record quality signals " "suggest the activity may be drifting toward compliance-only completion rather than strong learning.", size_pt=11, colour=NAVY) _add_chart(doc, chts, "low_value_units", 5.8) _spacer(doc) rows = [ [ r.get("activity_type", ""), str(r.get("business_unit", "")), str(r.get("count", 0)), f"{r.get('avg_quality', 0):.1f}", f"{r.get('shallow_pct', 0):.1f}%", ] for r in low_value_units[:8] ] _add_table(doc, ["Type", "Business Unit", "Count", "Avg Quality", "Shallow"], rows, [0.9, 2.8, 0.9, 1.0, 1.0]) _spacer(doc) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 9. Recommended Actions # ───────────────────────────────────────────────────────────────────────────── def _section_recommendations(doc: Document, results: AnalysisResults) -> None: _heading(doc, "9. Recommended Actions", 1) _callout( doc, "Action Agenda", "These actions are generated directly from the event hotspots, leading-activity quality signals, and project/location performance patterns in the report.", fill="eef6fb", accent=TEAL, ) _spacer(doc) for i, rec in enumerate(results.recommendations[:10], 1): priority = "Immediate" if i <= 3 else "Next" _callout(doc, f"{priority} Priority", rec, fill="f7fbff" if i <= 3 else "ffffff", accent=ALERT if i <= 3 else NAVY) _spacer(doc) _spacer(doc) _heading(doc, "Ongoing Monitoring Recommendations", 2) for item in [ "Track leading-activity volumes monthly by Business Unit against a set target (e.g. " "minimum 4 LLCs per leader per month).", "Track CCC/OCC/LLC quality monthly using shallow-entry rate, follow-up rate, and average quality score.", "Review LLC topic coverage quarterly to ensure alignment with top event root causes.", "Re-run this full report monthly or quarterly as new data becomes available.", "Supplement quantitative analysis with qualitative review of LLC content quality.", "Use the Business Unit activity-to-event ratio table to guide where SHEQ advisor " "engagement should be prioritised.", ]: _bullet(doc, item) _page_break(doc) # ───────────────────────────────────────────────────────────────────────────── # 10. Methodology and Caveats # ───────────────────────────────────────────────────────────────────────────── def _section_methodology(doc: Document, results: AnalysisResults) -> None: _heading(doc, "10. Methodology and Caveats", 1) trends = results.trends _heading(doc, "10.1 Data Sources", 2) rows = [ ["Events.xlsx", "Incident and event records exported from the Ventia safety management system. " "Covers all event types including injuries, motor vehicle events, close calls, " "environmental events, and nonconformances."], ["Safety_Energy.xlsx", "Combined leading activity export covering all three activity types: Leader Learning " "Conversations (LLC), Critical Control Checks (CCC), and Operational Control Checks (OCC). " "This is treated as the primary leading indicator data source."], ["LLC_Data.xlsx", "Supplementary LLC export providing richer free-text data (conversation topics, CRP focus, " "at-risk observations). Used primarily for theme and topic analysis. Record counts are " "closely aligned with the LLC records in Safety_Energy.xlsx."], ] _add_table(doc, ["Source", "Description"], rows, [2.0, 4.5]) _spacer(doc) _heading(doc, "10.2 Activity Type Definitions", 2) _para(doc, "Safety Energy is the combined analytical domain. It encompasses three activity types:", size_pt=11, colour=NAVY) for item in [ "LLC (Leader Learning Conversation): A structured conversation between a leader and " "a worker or work group, focused on safety topics, risk identification, and critical " "controls.", "CCC (Critical Control Check): A field verification that critical controls for high-risk " "activities are in place and effective (e.g. working at height, hazardous energies).", "OCC (Operational Control Check): A broader operational inspection or check covering " "a range of work-area risk topics.", "Note: In some legacy documentation or older exports, the label 'OCC' was used broadly " "to cover what is now split into CCC and OCC. The current Safety_Energy.xlsx export " "correctly separates these via the ModuleType field. No manual deduplication was required.", ]: _bullet(doc, item) _spacer(doc) _heading(doc, "10.3 Analytical Approach", 2) for item in [ "Monthly trend analysis: Activities and events are aggregated by calendar month. " "Trend direction is estimated by comparing recent-period averages against prior-period averages.", f"Rolling two-year Safety Energy review: deeper trend and quality analysis uses a {trends.get('window_months', 24)}-month " f"window from {trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}, anchored to the latest Safety Energy record.", "Effectiveness analysis: Business unit-level aggregates and overall monthly correlations " "are used as proxies for effectiveness. Correlation is computed using Pearson r.", "At-risk theme extraction: Free-text fields are scanned using a predefined keyword " "dictionary (see config.py). Frequency counts are combined across sources with a " "2× weight applied to event-source mentions (lagging signal).", "Business Unit focus: BUs are flagged as 'declining' if second-half activity volume " "is less than 70% of first-half volume within the analysis window.", "Leading-activity quality scoring: records are scored using practical proxies including text richness, specificity, " "risk recognition, action/follow-up language, learning evidence, and penalties for generic or duplicated wording.", ]: _bullet(doc, item) _spacer(doc) _heading(doc, "10.4 Caveats and Limitations", 2) for cav in results.caveats: _bullet(doc, cav) _spacer(doc) _para(doc, f"Report generated: {datetime.now().strftime('%d %B %Y at %H:%M')}", size_pt=9, colour=GREY) # ───────────────────────────────────────────────────────────────────────────── # Main entry point # ───────────────────────────────────────────────────────────────────────────── def build_report(results: AnalysisResults, output_dir: str) -> str: """ Build the full SHEQ DOCX report from an AnalysisResults object. Parameters ---------- results : output of analysis_engine.run_full_analysis output_dir : directory to write the .docx file into Returns ------- Absolute path to the generated .docx file. """ os.makedirs(output_dir, exist_ok=True) doc = Document() _bootstrap_styles(doc) log.info("Building DOCX report...") _title_page(doc, results) _section_executive_summary(doc, results) _section_data_quality(doc, results) _section_events(doc, results) _section_leading_overview(doc, results) _section_effectiveness(doc, results) _section_at_risk(doc, results) _section_se_events(doc, results) _section_focus_areas(doc, results) _section_recommendations(doc, results) _section_methodology(doc, results) output_path = os.path.join( output_dir, f"SHEQ_Safety_Performance_{datetime.now().strftime('%Y%m%d_%H%M')}.docx", ) doc.save(output_path) log.info("Report saved to %s", output_path) return output_path