Files

1158 lines
49 KiB
Python
Raw Permalink Normal View History

2026-04-20 15:23:18 +12:00
"""
report_builder.py — DOCX report generation for the SHEQ Analysis Tool.
Takes an AnalysisResults object from analysis_engine and produces a
fully-formatted DOCX report following the Ventia brand guidelines in
DESIGN.md.
Public API
----------
build_report(results: AnalysisResults, output_dir: str) -> str
Returns the path to the generated .docx file.
"""
from __future__ import annotations
import logging
import os
from datetime import datetime
from typing import Any
import pandas as pd
from docx import Document
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls
from docx.shared import Cm, Inches, Pt, RGBColor
from analysis_engine import AnalysisResults
from config import (
CONSEQUENCE_ORDER,
DEEP_BLUE, SKY_BLUE, DARK_GREEN, MUTED,
AMBER, RED,
)
log = logging.getLogger(__name__)
# ── Brand RGBColor objects ────────────────────────────────────────────────────
NAVY = RGBColor(0x0B, 0x32, 0x54) # Deep Blue
TEAL = RGBColor(0x13, 0xB5, 0xEA) # Sky Blue
GREEN = RGBColor(0x00, 0x6E, 0x47) # Dark Green
GREY = RGBColor(0x64, 0x74, 0x8B) # Muted / slate grey
ALERT = RGBColor(0xDC, 0x26, 0x26) # Red
# ─────────────────────────────────────────────────────────────────────────────
# Low-level DOCX helpers
# ─────────────────────────────────────────────────────────────────────────────
def _shading(cell, hex_color: str) -> None:
"""Apply background fill to a table cell."""
shd = parse_xml(
f'<w:shd {nsdecls("w")} w:fill="{hex_color}" w:val="clear"/>'
)
cell._tc.get_or_add_tcPr().append(shd)
def _run(para, text: str, bold: bool = False, size_pt: int = 11,
colour: RGBColor = NAVY, italic: bool = False) -> None:
run = para.add_run(text)
run.bold = bold
run.italic = italic
run.font.size = Pt(size_pt)
if isinstance(colour, str):
colour = RGBColor.from_string(colour.replace("#", "").upper())
run.font.color.rgb = colour
run.font.name = "Source Sans Pro"
def _heading(doc: Document, text: str, level: int) -> None:
doc.add_heading(text, level=level)
def _para(doc: Document, text: str = "", bold: bool = False,
size_pt: int = 11, colour: RGBColor = NAVY) -> None:
p = doc.add_paragraph()
_run(p, text, bold=bold, size_pt=size_pt, colour=colour)
def _bullet(doc: Document, text: str, size_pt: int = 11) -> None:
p = doc.add_paragraph(style="List Bullet")
_run(p, text, size_pt=size_pt, colour=NAVY)
def _callout(doc: Document, title: str, text: str,
fill: str = "f0f5fa", accent: RGBColor = TEAL) -> None:
table = doc.add_table(rows=1, cols=1)
table.alignment = WD_TABLE_ALIGNMENT.LEFT
table.style = "Table Grid"
cell = table.cell(0, 0)
_shading(cell, fill)
cell.text = ""
p1 = cell.paragraphs[0]
_run(p1, title, bold=True, size_pt=11, colour=accent)
p2 = cell.add_paragraph()
_run(p2, text, size_pt=10, colour=NAVY)
def _metric_cards(doc: Document, cards: list[tuple[str, str]],
cols: int = 4, fill: str = "f0f5fa") -> None:
if not cards:
return
rows = (len(cards) + cols - 1) // cols
table = doc.add_table(rows=rows, cols=cols)
table.alignment = WD_TABLE_ALIGNMENT.LEFT
table.style = "Table Grid"
idx = 0
for r in range(rows):
for c in range(cols):
cell = table.cell(r, c)
cell.text = ""
_shading(cell, fill if idx % 2 == 0 else "ffffff")
if idx < len(cards):
title, value = cards[idx]
p1 = cell.paragraphs[0]
_run(p1, title, bold=False, size_pt=8, colour=GREY)
p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
p2 = cell.add_paragraph()
_run(p2, value, bold=True, size_pt=16, colour=NAVY)
p2.alignment = WD_ALIGN_PARAGRAPH.CENTER
idx += 1
def _add_table(
doc: Document,
headers: list[str],
rows: list[list[str]],
col_widths: list[float], # inches
header_fill: str = "0b3254",
alt_fill: str = "f0f5fa",
) -> None:
"""Add a brand-styled table with a Deep Blue header row."""
table = doc.add_table(rows=1 + len(rows), cols=len(headers))
table.alignment = WD_TABLE_ALIGNMENT.LEFT
table.style = "Table Grid"
# Header row
for i, h in enumerate(headers):
cell = table.rows[0].cells[i]
cell.text = ""
p = cell.paragraphs[0]
run = p.add_run(h)
run.bold = True
run.font.size = Pt(9)
run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
run.font.name = "Source Sans Pro"
_shading(cell, header_fill)
# Data rows
for ri, row in enumerate(rows):
for ci, val in enumerate(row):
cell = table.rows[ri + 1].cells[ci]
cell.text = ""
p = cell.paragraphs[0]
run = p.add_run(str(val) if val is not None else "")
run.font.size = Pt(9)
run.font.name = "Source Sans Pro"
bg = alt_fill if ri % 2 == 0 else "ffffff"
_shading(cell, bg)
# Column widths
for ri, row in enumerate(table.rows):
for ci, w in enumerate(col_widths):
if ci < len(row.cells):
row.cells[ci].width = Inches(w)
def _add_chart(doc: Document, charts: dict[str, str], key: str,
width_in: float = 5.5) -> None:
"""Insert a chart image if it exists."""
path = charts.get(key)
if path and os.path.exists(path):
doc.add_picture(path, width=Inches(width_in))
else:
_para(doc, f"[Chart '{key}' not available]", colour=GREY, size_pt=9)
def _spacer(doc: Document) -> None:
doc.add_paragraph("")
def _page_break(doc: Document) -> None:
doc.add_page_break()
# ─────────────────────────────────────────────────────────────────────────────
# Document bootstrap
# ─────────────────────────────────────────────────────────────────────────────
def _bootstrap_styles(doc: Document) -> None:
"""Apply brand typography to the Document's built-in styles."""
style = doc.styles["Normal"]
style.font.name = "Source Sans Pro"
style.font.size = Pt(11)
for level, size, colour in [
(1, 16, NAVY),
(2, 13, TEAL),
(3, 11, NAVY),
]:
hs = doc.styles[f"Heading {level}"]
hs.font.name = "Source Sans Pro"
hs.font.size = Pt(size)
hs.font.color.rgb = colour
hs.font.bold = True
sec = doc.sections[0]
sec.top_margin = Cm(1.5)
sec.bottom_margin = Cm(1.4)
sec.left_margin = Cm(1.6)
sec.right_margin = Cm(1.6)
# ─────────────────────────────────────────────────────────────────────────────
# Title page
# ─────────────────────────────────────────────────────────────────────────────
def _title_page(doc: Document, results: AnalysisResults) -> None:
dq = results.data_quality
ev_from = dq.get("events", {}).get("date_from", "N/A")
ev_to = dq.get("events", {}).get("date_to", "N/A")
se_from = dq.get("safety_energy", {}).get("date_from", "N/A")
se_to = dq.get("safety_energy", {}).get("date_to", "N/A")
banner = doc.add_table(rows=1, cols=1)
banner.style = "Table Grid"
cell = banner.cell(0, 0)
_shading(cell, "0b3254")
cell.text = ""
p = cell.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
_run(p, "SHEQ Safety Performance Report", bold=True, size_pt=26, colour=RGBColor(0xFF, 0xFF, 0xFF))
p2 = cell.add_paragraph()
_run(p2, "Safety Energy, event risk, and leadership focus areas", size_pt=13, colour=RGBColor(0xD7, 0xF2, 0xFF))
_spacer(doc)
_callout(
doc,
"Report Scope",
f"Events window: {ev_from} {ev_to}. Leading activity window: {se_from} {se_to}. "
"Built for senior leaders as a concise decision-support pack rather than a compliance summary.",
fill="eef6fb",
accent=TEAL,
)
_spacer(doc)
_metric_cards(doc, [
("Generated", datetime.now().strftime("%d %b %Y")),
("Events Coverage", f"{ev_from} to {ev_to}"),
("Safety Energy Coverage", f"{se_from} to {se_to}"),
("Audience", "Executive / Board"),
], cols=2)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 1. Executive Summary
# ─────────────────────────────────────────────────────────────────────────────
def _section_executive_summary(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "1. Executive Summary", 1)
ev = results.events_summary
lead = results.leading_summary
trends = results.trends
recs = results.recommendations
totals = lead.get("totals", {})
_callout(
doc,
"Leadership Readout",
"This summary highlights where event risk is concentrated, where leading activities appear strongest or weakest, "
"and what senior leaders should prioritise next.",
fill="eef6fb",
accent=TEAL,
)
_spacer(doc)
_metric_cards(doc, [
("Total Events", str(ev.get("total", 0))),
("Moderate+ Events", f"{ev.get('serious_count', 0)}"),
("Events / Month", f"{ev.get('events_per_month', 0):.1f}"),
("Motor Vehicle Events", str(ev.get("motor_vehicle", {}).get("count", 0))),
("LLCs", str(totals.get("LLC", 0))),
("CCCs", str(totals.get("CCC", 0))),
("OCCs", str(totals.get("OCC", 0))),
("CCC Avg Quality", f"{trends.get('activity_insights', {}).get('CCC', {}).get('avg_quality', 0):.1f}"),
], cols=4)
_spacer(doc)
_heading(doc, "1.1 What Leaders Should Know", 2)
for rec in trends.get("executive_summary", [])[:4]:
_bullet(doc, rec)
_spacer(doc)
_heading(doc, "1.2 Board Visual Snapshot", 2)
_add_chart(doc, results.charts, "quality_mix", 6.0)
_spacer(doc)
_add_chart(doc, results.charts, "project_quadrant", 6.0)
_spacer(doc)
_heading(doc, "1.3 Priority Actions", 2)
for rec in recs[:5]:
_callout(doc, "Recommended Action", rec, fill="f7fbff", accent=NAVY)
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Data Quality
# ─────────────────────────────────────────────────────────────────────────────
def _section_data_quality(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "2. Data Quality and Coverage", 1)
dq = results.data_quality
_para(doc,
"This section summarises the completeness and date coverage of each data source. "
"Any gaps identified here may affect the reliability of subsequent analysis sections.",
size_pt=11, colour=NAVY)
_spacer(doc)
sources = [
("Events", dq.get("events", {}),
[("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"),
("Null — Event Type", "null_event_type"), ("Null — Consequence", "null_consequence"),
("Null — Business Unit", "null_business_unit"), ("Null — Root Cause", "null_root_cause")]),
("Safety Energy", dq.get("safety_energy", {}),
[("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"),
("Null — Leader", "null_leader"), ("Null — Business Unit", "null_bu")]),
("LLC Data", dq.get("llc", {}),
[("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"),
("Null — Topic", "null_topic"), ("Null — Leader", "null_leader")]),
]
for label, data, fields in sources:
_heading(doc, label, 2)
rows = [[f, str(data.get(k, "N/A"))] for f, k in fields]
_add_table(doc, ["Field", "Value"], rows, [3.0, 3.5])
_spacer(doc)
# Activity type breakdown for Safety Energy
if label == "Safety Energy":
breakdown = data.get("type_breakdown", {})
if breakdown:
br_rows = [[k, str(v)] for k, v in sorted(breakdown.items())]
_add_table(doc, ["Activity Type", "Count"], br_rows, [3.0, 3.5])
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Events Analysis
# ─────────────────────────────────────────────────────────────────────────────
def _section_events(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "3. Events Analysis", 1)
ev = results.events_summary
chts = results.charts
_callout(
doc,
"Event Story",
"This section focuses on where event burden is building, where serious outcomes are concentrated, and what special risk signals are visible in timing and motor vehicle data.",
fill="f7fbff",
accent=TEAL,
)
_spacer(doc)
# Monthly trend chart
_heading(doc, "3.1 Monthly Trend", 2)
_add_chart(doc, chts, "events_monthly", 6.0)
_spacer(doc)
# Consequence chart
_heading(doc, "3.2 Consequence Severity", 2)
_add_chart(doc, chts, "consequence", 5.5)
_spacer(doc)
# Event type table
_heading(doc, "3.3 Event Type Breakdown", 2)
event_types = ev.get("event_type_counts", {})
total_events = max(ev.get("total", 1), 1)
et_rows = [[t, str(c), f"{c/total_events*100:.1f}%"] for t, c in event_types.items()]
_add_table(doc, ["Event Type", "Count", "%"], et_rows, [3.0, 1.2, 1.0])
_spacer(doc)
# CRP table
crp = ev.get("crp_counts", {})
if crp:
_heading(doc, "3.4 Critical Risk Protocols Involved", 2)
crp_rows = [[c, str(v)] for c, v in crp.items()]
_add_table(doc, ["CRP", "Count"], crp_rows, [4.0, 1.5])
_spacer(doc)
# Root cause table
rc = ev.get("root_cause_counts", {})
if rc:
_heading(doc, "3.5 Root Cause Categories", 2)
rc_rows = [[r, str(v)] for r, v in rc.items()]
_add_table(doc, ["Root Cause", "Count"], rc_rows, [4.0, 1.5])
_spacer(doc)
serious_projects = ev.get("serious_projects", {})
serious_locations = ev.get("serious_locations", {})
if serious_projects or serious_locations:
_heading(doc, "3.6 Serious Event Hotspots", 2)
_para(doc,
"These are the projects and locations with the highest counts of moderate, major, or substantial events in the analysis window.",
size_pt=11, colour=NAVY)
_add_chart(doc, chts, "serious_hotspots", 5.8)
_spacer(doc)
if serious_projects:
rows = [[k, str(v)] for k, v in serious_projects.items()]
_add_table(doc, ["Project", "Serious Events"], rows, [4.0, 1.5])
_spacer(doc)
if serious_locations:
rows = [[k, str(v)] for k, v in serious_locations.items()]
_add_table(doc, ["Location", "Serious Events"], rows, [4.0, 1.5])
_spacer(doc)
time_buckets = ev.get("serious_time_buckets", {})
if time_buckets:
_heading(doc, "3.7 Serious Event Timing", 2)
_para(doc,
f"Time-of-day information was available for {ev.get('serious_time_coverage_pct', 0):.1f}% of serious events.",
size_pt=11, colour=NAVY)
rows = [[bucket, str(count)] for bucket, count in time_buckets.items()]
_add_table(doc, ["Time of Day", "Serious Events"], rows, [3.5, 1.5])
_spacer(doc)
motor = ev.get("motor_vehicle", {})
if motor.get("count", 0) > 0:
_heading(doc, "3.8 Motor Vehicle Events", 2)
mv_rows = [
["Total Motor Vehicle Events", str(motor.get("count", 0))],
["Share of All Events", f"{motor.get('pct_total', 0):.1f}%"],
["Moderate+ MV Events", str(motor.get("serious_count", 0))],
["Serious Rate Within MV Events", f"{motor.get('serious_pct_within_mve', 0):.1f}%"],
]
_add_table(doc, ["Metric", "Value"], mv_rows, [3.5, 2.0])
_spacer(doc)
if motor.get("top_projects"):
rows = [[k, str(v)] for k, v in motor.get("top_projects", {}).items()]
_add_table(doc, ["Project", "MV Events"], rows, [4.0, 1.5])
_spacer(doc)
rows = []
for label, values in [
("Road Type", motor.get("road_types", {})),
("Road Condition", motor.get("conditions", {})),
("Vehicle Type", motor.get("vehicle_types", {})),
]:
for key, val in list(values.items())[:4]:
rows.append([label, key, str(val)])
if rows:
_add_table(doc, ["Category", "Pattern", "Count"], rows, [1.5, 3.0, 1.0])
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Leading Activity Overview
# ─────────────────────────────────────────────────────────────────────────────
def _section_leading_overview(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "4. Safety Energy — Leading Activity Overview", 1)
lead = results.leading_summary
trends = results.trends
chts = results.charts
_callout(
doc,
"Leading Activity Readout",
"Safety Energy combines LLC, CCC, and OCC activity. The deeper two-year review is designed to show not just volume, "
"but whether the records look rich, preventive, and useful for leadership learning.",
fill="eef6fb",
accent=TEAL,
)
_spacer(doc)
# Activity mix donut
_heading(doc, "4.1 Activity Type Mix", 2)
_add_chart(doc, chts, "activity_mix", 4.5)
_spacer(doc)
# Totals table
totals = lead.get("totals", {})
avg_at_risk = lead.get("avg_at_risk", {})
if totals:
rows = [
[atype,
str(totals.get(atype, 0)),
f"{avg_at_risk.get(atype, 0):.2f}"]
for atype in ["LLC", "CCC", "OCC"]
if totals.get(atype, 0) > 0
]
_add_table(
doc,
["Activity Type", "Total Count", "Avg At-Risk Aspects per Activity"],
rows,
[2.5, 1.5, 3.0],
)
_spacer(doc)
# Monthly trend chart
_heading(doc, "4.2 Monthly Activity Trend", 2)
_para(doc, f"Overall trend: {lead.get('activity_trend', 'N/A')}", size_pt=11, colour=GREY)
_add_chart(doc, chts, "leading_monthly", 6.0)
_spacer(doc)
# BU breakdown
_heading(doc, "4.3 Activity by Business Unit", 2)
bu_by_type = lead.get("bu_by_type", {})
all_bus: list[str] = sorted(set(
bu for d in bu_by_type.values() for bu in d.keys()
))
if all_bus:
rows = [
[bu] + [str(bu_by_type.get(at, {}).get(bu, 0)) for at in ["LLC", "CCC", "OCC"]]
for bu in all_bus
]
_add_table(doc,
["Business Unit", "LLC", "CCC", "OCC"],
rows,
[2.8, 1.0, 1.0, 1.0])
_spacer(doc)
# LLC topic breakdown (from LLC_Data)
_heading(doc, "4.4 LLC Conversation Topics", 2)
_para(doc,
"The following topics were most frequently recorded in Leader Learning Conversations. "
"Topic coverage indicates where leaders are directing their field conversations.",
size_pt=11, colour=NAVY)
_add_chart(doc, chts, "llc_topics", 5.5)
_spacer(doc)
top_topics = lead.get("top_topics", {})
if top_topics:
rows = [[k, str(v)] for k, v in list(top_topics.items())[:12]]
_add_table(doc, ["Topic", "Count"], rows, [4.0, 1.5])
_spacer(doc)
# CRP focus
_heading(doc, "4.5 CRP Focus Areas in LLCs", 2)
_add_chart(doc, chts, "crp_focus", 5.5)
_spacer(doc)
# Top leaders
_heading(doc, "4.6 Most Active Leaders (LLC)", 2)
_add_chart(doc, chts, "top_leaders", 5.5)
_spacer(doc)
top_leaders = lead.get("top_leaders", {})
if top_leaders:
rows = [[l, str(c)] for l, c in list(top_leaders.items())[:12]]
_add_table(doc, ["Leader", "LLC Count"], rows, [4.0, 1.5])
_spacer(doc)
# Two-year trend and quality view
_heading(doc, "4.7 Rolling Two-Year Trend and Quality View", 2)
_para(doc,
f"This view uses a rolling two-year Safety Energy window from "
f"{trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}. "
"It is designed to answer not just whether activities were completed, but whether "
"the records suggest meaningful learning, risk recognition, and follow-up quality.",
size_pt=11, colour=NAVY)
_para(doc, trends.get("proxy_note", ""), size_pt=10, colour=GREY)
_add_chart(doc, chts, "quality_trend", 6.0)
_spacer(doc)
_add_chart(doc, chts, "quality_mix", 6.0)
_spacer(doc)
quality_rows = trends.get("quality_by_type", [])
if quality_rows:
rows = [
[
r.get("activity_type", ""),
str(r.get("count", 0)),
f"{r.get('avg_quality', 0):.1f}",
f"{r.get('avg_input_depth', 0):.1f}",
f"{r.get('meaningful_pct', 0):.1f}%",
f"{r.get('high_value_pct', 0):.1f}%",
f"{r.get('shallow_pct', 0):.1f}%",
f"{r.get('follow_up_pct', 0):.1f}%",
]
for r in quality_rows
]
_add_table(
doc,
["Type", "Count", "Avg Quality", "Avg Input Depth", "Meaningful", "High Value", "Shallow", "Follow-up"],
rows,
[0.8, 0.8, 0.9, 1.0, 0.9, 0.9, 0.9, 0.9],
)
_spacer(doc)
input_depth = trends.get("input_depth", {})
if input_depth.get("correlation") is not None:
_heading(doc, "4.8 Input Depth as a Supporting Quality Metric", 2)
_para(doc,
f"Across the two-year Safety Energy window, input depth and quality score are correlated at r = {input_depth.get('correlation'):.2f}. "
f"{input_depth.get('note', '')}",
size_pt=11, colour=NAVY)
by_band = input_depth.get("by_band", [])
if by_band:
rows = [
[
r.get("band", ""),
str(r.get("count", 0)),
f"{r.get('avg_input_depth', 0):.1f}",
f"{r.get('avg_quality', 0):.1f}",
f"{r.get('meaningful_pct', 0):.1f}%",
f"{r.get('high_value_pct', 0):.1f}%",
f"{r.get('shallow_pct', 0):.1f}%",
]
for r in by_band
]
_add_table(
doc,
["Band", "Count", "Avg Input Depth", "Avg Quality", "Meaningful", "High Value", "Shallow"],
rows,
[0.9, 0.8, 1.0, 0.9, 0.9, 0.9, 0.9],
)
_spacer(doc)
top_themes = trends.get("top_themes", {})
if top_themes:
_para(doc, "Most common recurring themes in Safety Energy narratives:", bold=True, size_pt=11, colour=NAVY)
for theme, count in list(top_themes.items())[:5]:
_bullet(doc, f"{theme}: {count} mentions")
_spacer(doc)
_heading(doc, "4.9 CCC / OCCC / LLC Value Signals", 2)
activity_insights = trends.get("activity_insights", {})
for atype in ["CCC", "OCC", "LLC"]:
insight = activity_insights.get(atype, {})
if not insight:
continue
_para(
doc,
f"{atype}: average quality {insight.get('avg_quality', 0):.1f}/100, "
f"average input depth {insight.get('avg_input_depth', 0):.1f}/100, "
f"{insight.get('preventive_pct', 0):.1f}% preventive signal, "
f"{insight.get('reactive_pct', 0):.1f}% reactive signal, "
f"{insight.get('repetitive_pct', 0):.1f}% repetitive signal, "
f"{insight.get('shallow_pct', 0):.1f}% shallow.",
size_pt=11,
colour=NAVY,
)
depth = insight.get("input_depth", {})
if depth.get("correlation") is not None:
_para(doc,
f"For {atype}, input depth vs quality correlation is r = {depth.get('correlation'):.2f}.",
size_pt=10, colour=GREY)
top_modules = insight.get("top_modules", {})
if top_modules:
_para(doc, f"Top {atype} focus areas:", bold=True, size_pt=10, colour=GREY)
for label, count in list(top_modules.items())[:4]:
_bullet(doc, f"{label}: {count}")
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Effectiveness of Leading Activities
# ─────────────────────────────────────────────────────────────────────────────
def _section_effectiveness(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "5. Effectiveness of Leading Activities", 1)
eff = results.effectiveness
chts = results.charts
_para(doc,
"This section examines whether leading activity patterns appear associated with "
"event outcomes at a business unit and portfolio level. All findings are associative "
"only — correlation does not imply causation.",
size_pt=11, colour=NAVY)
_spacer(doc)
# Monthly overlay chart
_heading(doc, "5.1 Monthly Activities vs Events Overlay", 2)
_add_chart(doc, chts, "overlay", 6.0)
_spacer(doc)
# Correlation note
_heading(doc, "5.2 Statistical Association", 2)
_para(doc, eff.get("corr_note", "N/A"), size_pt=11, colour=NAVY)
_spacer(doc)
# BU comparison chart
_heading(doc, "5.3 Activities vs Events by Business Unit", 2)
_add_chart(doc, chts, "bu_comparison", 5.5)
_spacer(doc)
# BU table
bu_table = eff.get("bu_table", [])
if bu_table:
rows = [
[r.get("business_unit", ""), str(r.get("activities", 0)),
str(r.get("events", 0))]
for r in bu_table
]
_add_table(doc,
["Business Unit", "Leading Activities", "Events"],
rows,
[3.0, 2.0, 1.5])
_spacer(doc)
# Pattern commentary
high_both = eff.get("high_activity_high_events", [])
high_acts = eff.get("high_activity_low_events", [])
if high_both:
_heading(doc, "5.4 High Activity / High Events — Pattern of Interest", 2)
_para(doc,
"The following Business Units recorded both above-median leading-activity volumes "
"and above-median event counts. This may indicate reactive activity patterns where "
"engagement is increasing in response to events rather than preventing them. "
"Further investigation is recommended.",
size_pt=11, colour=NAVY)
for bu in high_both:
_bullet(doc, bu)
_spacer(doc)
if high_acts:
_heading(doc, "5.5 High Activity / Low Events — Positive Signal", 2)
_para(doc,
"The following Business Units recorded above-median leading-activity volumes "
"and below-median event counts. This pattern is consistent with leading activities "
"having a preventive effect, though this cannot be confirmed from available data.",
size_pt=11, colour=NAVY)
for bu in high_acts:
_bullet(doc, bu)
_spacer(doc)
_para(doc, eff.get("note", ""), size_pt=10, colour=GREY)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 6. At-Risk Behaviours
# ─────────────────────────────────────────────────────────────────────────────
def _section_at_risk(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "6. At-Risk Behaviours", 1)
ar = results.at_risk
chts = results.charts
_para(doc,
"At-risk behaviour themes are identified by analysing free-text fields across all "
"three data sources (Events descriptions, LLC conversation notes, and Safety Energy "
"observations) using keyword matching against known risk categories.",
size_pt=11, colour=NAVY)
_spacer(doc)
_heading(doc, "6.1 Combined Theme Frequency", 2)
_add_chart(doc, chts, "at_risk_themes", 5.5)
_spacer(doc)
combined = ar.get("combined_themes", {})
if combined:
rows = [[k, str(v)] for k, v in combined.items()]
_add_table(
doc,
["Risk Theme", "Weighted Frequency"],
rows,
[3.5, 2.0],
)
_spacer(doc)
# LLC theme focus vs event themes
_heading(doc, "6.2 LLC Conversation Topics vs Event Themes", 2)
llc_themes = ar.get("llc_themes", {})
ev_themes = ar.get("event_themes", {})
if llc_themes or ev_themes:
all_themes = sorted(
set(list(llc_themes.keys()) + list(ev_themes.keys()))
)
rows = [
[t, str(llc_themes.get(t, 0)), str(ev_themes.get(t, 0))]
for t in all_themes
]
_add_table(doc,
["Risk Theme", "LLC Mentions", "Event Mentions"],
rows,
[3.0, 1.5, 1.5])
_spacer(doc)
# Alignment gap
gap = ar.get("gap_themes", [])
if gap:
_heading(doc, "6.3 Topic Alignment Gaps", 2)
_para(doc,
"The following risk themes appear among the top event themes but are under-represented "
"in LLC conversation topics. This may indicate a gap between where safety conversations "
"are focused and where actual events are occurring.",
size_pt=11, colour=NAVY)
for theme in gap:
_bullet(doc, theme)
_spacer(doc)
_para(doc, ar.get("note", ""), size_pt=10, colour=GREY)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 7. Safety Energy ↔ Events Relationship
# ─────────────────────────────────────────────────────────────────────────────
def _section_se_events(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "7. Relationship Between Safety Energy and Events", 1)
rel = results.se_events_rel
chts = results.charts
_callout(
doc,
"Coverage vs Outcome",
"This section links leading activity coverage to event burden so leaders can see which business units, projects, and locations appear strongest, and which need direct intervention.",
fill="f7fbff",
accent=TEAL,
)
_spacer(doc)
# BU comparison table
_heading(doc, "7.1 Business Unit Comparison", 2)
bu_comp = rel.get("bu_comparison", [])
if bu_comp:
rows = [
[str(r.get("business_unit", "")),
str(int(r.get("activities", 0))),
str(int(r.get("events", 0))),
str(r.get("ratio", "") if r.get("ratio", "") is not None else "")]
for r in bu_comp
]
_add_table(doc,
["Business Unit", "Activities", "Events", "Activity:Event Ratio"],
rows,
[2.5, 1.2, 1.0, 2.0])
_spacer(doc)
project_comp = rel.get("project_comparison", {})
if project_comp.get("best") or project_comp.get("watch"):
_heading(doc, "7.2 Project Performance Signals", 2)
_para(doc,
"These project comparisons use recorded Safety Energy activity against recorded events. They are intended as directional signals only, not league tables.",
size_pt=11, colour=NAVY)
_add_chart(doc, chts, "project_quadrant", 5.8)
_spacer(doc)
if project_comp.get("best"):
rows = [
[str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "") if r.get("activity_event_ratio", "") is not None else "")]
for r in project_comp.get("best", [])
]
_add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
_spacer(doc)
if project_comp.get("watch"):
_para(doc, "Projects needing leadership attention:", bold=True, size_pt=11, colour=NAVY)
rows = [
[str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "") if r.get("activity_event_ratio", "") is not None else "")]
for r in project_comp.get("watch", [])[:6]
]
_add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
_spacer(doc)
location_comp = rel.get("location_comparison", {})
if location_comp.get("best") or location_comp.get("watch"):
_heading(doc, "7.3 Location Performance Signals", 2)
if location_comp.get("best"):
rows = [
[str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "") if r.get("activity_event_ratio", "") is not None else "")]
for r in location_comp.get("best", [])
]
_add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
_spacer(doc)
if location_comp.get("watch"):
_para(doc, "Locations needing leadership attention:", bold=True, size_pt=11, colour=NAVY)
rows = [
[str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "") if r.get("activity_event_ratio", "") is not None else "")]
for r in location_comp.get("watch", [])[:6]
]
_add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
_spacer(doc)
# Spike months
spikes = rel.get("spike_months", [])
if spikes:
_heading(doc, "7.4 Event Spike Periods", 2)
_para(doc,
"The following periods recorded above-average event counts coinciding with "
"below-average leading-activity volumes. These periods may warrant retrospective "
"review.",
size_pt=11, colour=NAVY)
for m in spikes:
_bullet(doc, m)
_spacer(doc)
# Topic alignment note
_heading(doc, "7.5 Topic Alignment Observation", 2)
_para(doc, rel.get("alignment_note", ""), size_pt=11, colour=NAVY)
llc_top = rel.get("llc_top_topics", [])
ev_top = rel.get("ev_top_rc", [])
if llc_top:
_para(doc, "Top LLC topics:", bold=True, size_pt=11, colour=NAVY)
for t in llc_top:
_bullet(doc, str(t))
if ev_top:
_para(doc, "Top event root causes:", bold=True, size_pt=11, colour=NAVY)
for t in ev_top:
_bullet(doc, str(t))
_spacer(doc)
_para(doc, rel.get("note", ""), size_pt=10, colour=GREY)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 8. Leader Focus Areas
# ─────────────────────────────────────────────────────────────────────────────
def _section_focus_areas(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "8. Leader Focus Areas", 1)
fa = results.focus_areas
trends = results.trends
chts = results.charts
_para(doc,
"This section identifies Business Units and leaders that warrant specific leadership "
"attention based on activity volumes, event rates, and observed trends.",
size_pt=11, colour=NAVY)
_spacer(doc)
# BU summary table
_heading(doc, "8.1 Business Unit Activity and Event Summary", 2)
bu_summary = fa.get("bu_summary", [])
if bu_summary:
rows = [
[str(r.get("business_unit", "")),
str(r.get("activities", 0)),
str(r.get("events", 0))]
for r in bu_summary
]
_add_table(doc,
["Business Unit", "Leading Activities", "Events"],
rows,
[3.0, 2.0, 1.5])
_spacer(doc)
# Declining BUs
declining = fa.get("declining_bus", [])
if declining:
_heading(doc, "8.2 Declining Activity Units", 2)
_para(doc,
"The following Business Units recorded significantly lower leading-activity "
"volumes in the second half of the analysis period compared to the first half. "
"Leaders in these units should be engaged to understand and address the decline.",
size_pt=11, colour=NAVY)
for bu in declining:
_bullet(doc, bu)
_spacer(doc)
leadership_focus = trends.get("leadership_focus", [])
if leadership_focus:
_heading(doc, "8.3 Leadership Watchouts from Two-Year Safety Energy Trends", 2)
for item in leadership_focus[:5]:
_bullet(doc, item)
_spacer(doc)
low_value_units = trends.get("high_volume_low_value", [])
if low_value_units:
_heading(doc, "8.4 High-Volume / Low-Value Hotspots", 2)
_para(doc,
"These areas are recording substantial activity volume, but the record quality signals "
"suggest the activity may be drifting toward compliance-only completion rather than strong learning.",
size_pt=11, colour=NAVY)
_add_chart(doc, chts, "low_value_units", 5.8)
_spacer(doc)
rows = [
[
r.get("activity_type", ""),
str(r.get("business_unit", "")),
str(r.get("count", 0)),
f"{r.get('avg_quality', 0):.1f}",
f"{r.get('shallow_pct', 0):.1f}%",
]
for r in low_value_units[:8]
]
_add_table(doc,
["Type", "Business Unit", "Count", "Avg Quality", "Shallow"],
rows,
[0.9, 2.8, 0.9, 1.0, 1.0])
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 9. Recommended Actions
# ─────────────────────────────────────────────────────────────────────────────
def _section_recommendations(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "9. Recommended Actions", 1)
_callout(
doc,
"Action Agenda",
"These actions are generated directly from the event hotspots, leading-activity quality signals, and project/location performance patterns in the report.",
fill="eef6fb",
accent=TEAL,
)
_spacer(doc)
for i, rec in enumerate(results.recommendations[:10], 1):
priority = "Immediate" if i <= 3 else "Next"
_callout(doc, f"{priority} Priority", rec, fill="f7fbff" if i <= 3 else "ffffff", accent=ALERT if i <= 3 else NAVY)
_spacer(doc)
_spacer(doc)
_heading(doc, "Ongoing Monitoring Recommendations", 2)
for item in [
"Track leading-activity volumes monthly by Business Unit against a set target (e.g. "
"minimum 4 LLCs per leader per month).",
"Track CCC/OCC/LLC quality monthly using shallow-entry rate, follow-up rate, and average quality score.",
"Review LLC topic coverage quarterly to ensure alignment with top event root causes.",
"Re-run this full report monthly or quarterly as new data becomes available.",
"Supplement quantitative analysis with qualitative review of LLC content quality.",
"Use the Business Unit activity-to-event ratio table to guide where SHEQ advisor "
"engagement should be prioritised.",
]:
_bullet(doc, item)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 10. Methodology and Caveats
# ─────────────────────────────────────────────────────────────────────────────
def _section_methodology(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "10. Methodology and Caveats", 1)
trends = results.trends
_heading(doc, "10.1 Data Sources", 2)
rows = [
["Events.xlsx",
"Incident and event records exported from the Ventia safety management system. "
"Covers all event types including injuries, motor vehicle events, close calls, "
"environmental events, and nonconformances."],
["Safety_Energy.xlsx",
"Combined leading activity export covering all three activity types: Leader Learning "
"Conversations (LLC), Critical Control Checks (CCC), and Operational Control Checks (OCC). "
"This is treated as the primary leading indicator data source."],
["LLC_Data.xlsx",
"Supplementary LLC export providing richer free-text data (conversation topics, CRP focus, "
"at-risk observations). Used primarily for theme and topic analysis. Record counts are "
"closely aligned with the LLC records in Safety_Energy.xlsx."],
]
_add_table(doc, ["Source", "Description"], rows, [2.0, 4.5])
_spacer(doc)
_heading(doc, "10.2 Activity Type Definitions", 2)
_para(doc,
"Safety Energy is the combined analytical domain. It encompasses three activity types:",
size_pt=11, colour=NAVY)
for item in [
"LLC (Leader Learning Conversation): A structured conversation between a leader and "
"a worker or work group, focused on safety topics, risk identification, and critical "
"controls.",
"CCC (Critical Control Check): A field verification that critical controls for high-risk "
"activities are in place and effective (e.g. working at height, hazardous energies).",
"OCC (Operational Control Check): A broader operational inspection or check covering "
"a range of work-area risk topics.",
"Note: In some legacy documentation or older exports, the label 'OCC' was used broadly "
"to cover what is now split into CCC and OCC. The current Safety_Energy.xlsx export "
"correctly separates these via the ModuleType field. No manual deduplication was required.",
]:
_bullet(doc, item)
_spacer(doc)
_heading(doc, "10.3 Analytical Approach", 2)
for item in [
"Monthly trend analysis: Activities and events are aggregated by calendar month. "
"Trend direction is estimated by comparing recent-period averages against prior-period averages.",
f"Rolling two-year Safety Energy review: deeper trend and quality analysis uses a {trends.get('window_months', 24)}-month "
f"window from {trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}, anchored to the latest Safety Energy record.",
"Effectiveness analysis: Business unit-level aggregates and overall monthly correlations "
"are used as proxies for effectiveness. Correlation is computed using Pearson r.",
"At-risk theme extraction: Free-text fields are scanned using a predefined keyword "
"dictionary (see config.py). Frequency counts are combined across sources with a "
"2× weight applied to event-source mentions (lagging signal).",
"Business Unit focus: BUs are flagged as 'declining' if second-half activity volume "
"is less than 70% of first-half volume within the analysis window.",
"Leading-activity quality scoring: records are scored using practical proxies including text richness, specificity, "
"risk recognition, action/follow-up language, learning evidence, and penalties for generic or duplicated wording.",
]:
_bullet(doc, item)
_spacer(doc)
_heading(doc, "10.4 Caveats and Limitations", 2)
for cav in results.caveats:
_bullet(doc, cav)
_spacer(doc)
_para(doc,
f"Report generated: {datetime.now().strftime('%d %B %Y at %H:%M')}",
size_pt=9, colour=GREY)
# ─────────────────────────────────────────────────────────────────────────────
# Main entry point
# ─────────────────────────────────────────────────────────────────────────────
def build_report(results: AnalysisResults, output_dir: str) -> str:
"""
Build the full SHEQ DOCX report from an AnalysisResults object.
Parameters
----------
results : output of analysis_engine.run_full_analysis
output_dir : directory to write the .docx file into
Returns
-------
Absolute path to the generated .docx file.
"""
os.makedirs(output_dir, exist_ok=True)
doc = Document()
_bootstrap_styles(doc)
log.info("Building DOCX report...")
_title_page(doc, results)
_section_executive_summary(doc, results)
_section_data_quality(doc, results)
_section_events(doc, results)
_section_leading_overview(doc, results)
_section_effectiveness(doc, results)
_section_at_risk(doc, results)
_section_se_events(doc, results)
_section_focus_areas(doc, results)
_section_recommendations(doc, results)
_section_methodology(doc, results)
output_path = os.path.join(
output_dir,
f"SHEQ_Safety_Performance_{datetime.now().strftime('%Y%m%d_%H%M')}.docx",
)
doc.save(output_path)
log.info("Report saved to %s", output_path)
return output_path