Files
2026-04-20 15:23:18 +12:00

1158 lines
49 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
report_builder.py — DOCX report generation for the SHEQ Analysis Tool.
Takes an AnalysisResults object from analysis_engine and produces a
fully-formatted DOCX report following the Ventia brand guidelines in
DESIGN.md.
Public API
----------
build_report(results: AnalysisResults, output_dir: str) -> str
Returns the path to the generated .docx file.
"""
from __future__ import annotations
import logging
import os
from datetime import datetime
from typing import Any
import pandas as pd
from docx import Document
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls
from docx.shared import Cm, Inches, Pt, RGBColor
from analysis_engine import AnalysisResults
from config import (
CONSEQUENCE_ORDER,
DEEP_BLUE, SKY_BLUE, DARK_GREEN, MUTED,
AMBER, RED,
)
log = logging.getLogger(__name__)
# ── Brand RGBColor objects ────────────────────────────────────────────────────
NAVY = RGBColor(0x0B, 0x32, 0x54) # Deep Blue
TEAL = RGBColor(0x13, 0xB5, 0xEA) # Sky Blue
GREEN = RGBColor(0x00, 0x6E, 0x47) # Dark Green
GREY = RGBColor(0x64, 0x74, 0x8B) # Muted / slate grey
ALERT = RGBColor(0xDC, 0x26, 0x26) # Red
# ─────────────────────────────────────────────────────────────────────────────
# Low-level DOCX helpers
# ─────────────────────────────────────────────────────────────────────────────
def _shading(cell, hex_color: str) -> None:
"""Apply background fill to a table cell."""
shd = parse_xml(
f'<w:shd {nsdecls("w")} w:fill="{hex_color}" w:val="clear"/>'
)
cell._tc.get_or_add_tcPr().append(shd)
def _run(para, text: str, bold: bool = False, size_pt: int = 11,
colour: RGBColor = NAVY, italic: bool = False) -> None:
run = para.add_run(text)
run.bold = bold
run.italic = italic
run.font.size = Pt(size_pt)
if isinstance(colour, str):
colour = RGBColor.from_string(colour.replace("#", "").upper())
run.font.color.rgb = colour
run.font.name = "Source Sans Pro"
def _heading(doc: Document, text: str, level: int) -> None:
doc.add_heading(text, level=level)
def _para(doc: Document, text: str = "", bold: bool = False,
size_pt: int = 11, colour: RGBColor = NAVY) -> None:
p = doc.add_paragraph()
_run(p, text, bold=bold, size_pt=size_pt, colour=colour)
def _bullet(doc: Document, text: str, size_pt: int = 11) -> None:
p = doc.add_paragraph(style="List Bullet")
_run(p, text, size_pt=size_pt, colour=NAVY)
def _callout(doc: Document, title: str, text: str,
fill: str = "f0f5fa", accent: RGBColor = TEAL) -> None:
table = doc.add_table(rows=1, cols=1)
table.alignment = WD_TABLE_ALIGNMENT.LEFT
table.style = "Table Grid"
cell = table.cell(0, 0)
_shading(cell, fill)
cell.text = ""
p1 = cell.paragraphs[0]
_run(p1, title, bold=True, size_pt=11, colour=accent)
p2 = cell.add_paragraph()
_run(p2, text, size_pt=10, colour=NAVY)
def _metric_cards(doc: Document, cards: list[tuple[str, str]],
cols: int = 4, fill: str = "f0f5fa") -> None:
if not cards:
return
rows = (len(cards) + cols - 1) // cols
table = doc.add_table(rows=rows, cols=cols)
table.alignment = WD_TABLE_ALIGNMENT.LEFT
table.style = "Table Grid"
idx = 0
for r in range(rows):
for c in range(cols):
cell = table.cell(r, c)
cell.text = ""
_shading(cell, fill if idx % 2 == 0 else "ffffff")
if idx < len(cards):
title, value = cards[idx]
p1 = cell.paragraphs[0]
_run(p1, title, bold=False, size_pt=8, colour=GREY)
p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
p2 = cell.add_paragraph()
_run(p2, value, bold=True, size_pt=16, colour=NAVY)
p2.alignment = WD_ALIGN_PARAGRAPH.CENTER
idx += 1
def _add_table(
doc: Document,
headers: list[str],
rows: list[list[str]],
col_widths: list[float], # inches
header_fill: str = "0b3254",
alt_fill: str = "f0f5fa",
) -> None:
"""Add a brand-styled table with a Deep Blue header row."""
table = doc.add_table(rows=1 + len(rows), cols=len(headers))
table.alignment = WD_TABLE_ALIGNMENT.LEFT
table.style = "Table Grid"
# Header row
for i, h in enumerate(headers):
cell = table.rows[0].cells[i]
cell.text = ""
p = cell.paragraphs[0]
run = p.add_run(h)
run.bold = True
run.font.size = Pt(9)
run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
run.font.name = "Source Sans Pro"
_shading(cell, header_fill)
# Data rows
for ri, row in enumerate(rows):
for ci, val in enumerate(row):
cell = table.rows[ri + 1].cells[ci]
cell.text = ""
p = cell.paragraphs[0]
run = p.add_run(str(val) if val is not None else "")
run.font.size = Pt(9)
run.font.name = "Source Sans Pro"
bg = alt_fill if ri % 2 == 0 else "ffffff"
_shading(cell, bg)
# Column widths
for ri, row in enumerate(table.rows):
for ci, w in enumerate(col_widths):
if ci < len(row.cells):
row.cells[ci].width = Inches(w)
def _add_chart(doc: Document, charts: dict[str, str], key: str,
width_in: float = 5.5) -> None:
"""Insert a chart image if it exists."""
path = charts.get(key)
if path and os.path.exists(path):
doc.add_picture(path, width=Inches(width_in))
else:
_para(doc, f"[Chart '{key}' not available]", colour=GREY, size_pt=9)
def _spacer(doc: Document) -> None:
doc.add_paragraph("")
def _page_break(doc: Document) -> None:
doc.add_page_break()
# ─────────────────────────────────────────────────────────────────────────────
# Document bootstrap
# ─────────────────────────────────────────────────────────────────────────────
def _bootstrap_styles(doc: Document) -> None:
"""Apply brand typography to the Document's built-in styles."""
style = doc.styles["Normal"]
style.font.name = "Source Sans Pro"
style.font.size = Pt(11)
for level, size, colour in [
(1, 16, NAVY),
(2, 13, TEAL),
(3, 11, NAVY),
]:
hs = doc.styles[f"Heading {level}"]
hs.font.name = "Source Sans Pro"
hs.font.size = Pt(size)
hs.font.color.rgb = colour
hs.font.bold = True
sec = doc.sections[0]
sec.top_margin = Cm(1.5)
sec.bottom_margin = Cm(1.4)
sec.left_margin = Cm(1.6)
sec.right_margin = Cm(1.6)
# ─────────────────────────────────────────────────────────────────────────────
# Title page
# ─────────────────────────────────────────────────────────────────────────────
def _title_page(doc: Document, results: AnalysisResults) -> None:
dq = results.data_quality
ev_from = dq.get("events", {}).get("date_from", "N/A")
ev_to = dq.get("events", {}).get("date_to", "N/A")
se_from = dq.get("safety_energy", {}).get("date_from", "N/A")
se_to = dq.get("safety_energy", {}).get("date_to", "N/A")
banner = doc.add_table(rows=1, cols=1)
banner.style = "Table Grid"
cell = banner.cell(0, 0)
_shading(cell, "0b3254")
cell.text = ""
p = cell.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
_run(p, "SHEQ Safety Performance Report", bold=True, size_pt=26, colour=RGBColor(0xFF, 0xFF, 0xFF))
p2 = cell.add_paragraph()
_run(p2, "Safety Energy, event risk, and leadership focus areas", size_pt=13, colour=RGBColor(0xD7, 0xF2, 0xFF))
_spacer(doc)
_callout(
doc,
"Report Scope",
f"Events window: {ev_from} {ev_to}. Leading activity window: {se_from} {se_to}. "
"Built for senior leaders as a concise decision-support pack rather than a compliance summary.",
fill="eef6fb",
accent=TEAL,
)
_spacer(doc)
_metric_cards(doc, [
("Generated", datetime.now().strftime("%d %b %Y")),
("Events Coverage", f"{ev_from} to {ev_to}"),
("Safety Energy Coverage", f"{se_from} to {se_to}"),
("Audience", "Executive / Board"),
], cols=2)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 1. Executive Summary
# ─────────────────────────────────────────────────────────────────────────────
def _section_executive_summary(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "1. Executive Summary", 1)
ev = results.events_summary
lead = results.leading_summary
trends = results.trends
recs = results.recommendations
totals = lead.get("totals", {})
_callout(
doc,
"Leadership Readout",
"This summary highlights where event risk is concentrated, where leading activities appear strongest or weakest, "
"and what senior leaders should prioritise next.",
fill="eef6fb",
accent=TEAL,
)
_spacer(doc)
_metric_cards(doc, [
("Total Events", str(ev.get("total", 0))),
("Moderate+ Events", f"{ev.get('serious_count', 0)}"),
("Events / Month", f"{ev.get('events_per_month', 0):.1f}"),
("Motor Vehicle Events", str(ev.get("motor_vehicle", {}).get("count", 0))),
("LLCs", str(totals.get("LLC", 0))),
("CCCs", str(totals.get("CCC", 0))),
("OCCs", str(totals.get("OCC", 0))),
("CCC Avg Quality", f"{trends.get('activity_insights', {}).get('CCC', {}).get('avg_quality', 0):.1f}"),
], cols=4)
_spacer(doc)
_heading(doc, "1.1 What Leaders Should Know", 2)
for rec in trends.get("executive_summary", [])[:4]:
_bullet(doc, rec)
_spacer(doc)
_heading(doc, "1.2 Board Visual Snapshot", 2)
_add_chart(doc, results.charts, "quality_mix", 6.0)
_spacer(doc)
_add_chart(doc, results.charts, "project_quadrant", 6.0)
_spacer(doc)
_heading(doc, "1.3 Priority Actions", 2)
for rec in recs[:5]:
_callout(doc, "Recommended Action", rec, fill="f7fbff", accent=NAVY)
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Data Quality
# ─────────────────────────────────────────────────────────────────────────────
def _section_data_quality(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "2. Data Quality and Coverage", 1)
dq = results.data_quality
_para(doc,
"This section summarises the completeness and date coverage of each data source. "
"Any gaps identified here may affect the reliability of subsequent analysis sections.",
size_pt=11, colour=NAVY)
_spacer(doc)
sources = [
("Events", dq.get("events", {}),
[("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"),
("Null — Event Type", "null_event_type"), ("Null — Consequence", "null_consequence"),
("Null — Business Unit", "null_business_unit"), ("Null — Root Cause", "null_root_cause")]),
("Safety Energy", dq.get("safety_energy", {}),
[("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"),
("Null — Leader", "null_leader"), ("Null — Business Unit", "null_bu")]),
("LLC Data", dq.get("llc", {}),
[("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"),
("Null — Topic", "null_topic"), ("Null — Leader", "null_leader")]),
]
for label, data, fields in sources:
_heading(doc, label, 2)
rows = [[f, str(data.get(k, "N/A"))] for f, k in fields]
_add_table(doc, ["Field", "Value"], rows, [3.0, 3.5])
_spacer(doc)
# Activity type breakdown for Safety Energy
if label == "Safety Energy":
breakdown = data.get("type_breakdown", {})
if breakdown:
br_rows = [[k, str(v)] for k, v in sorted(breakdown.items())]
_add_table(doc, ["Activity Type", "Count"], br_rows, [3.0, 3.5])
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Events Analysis
# ─────────────────────────────────────────────────────────────────────────────
def _section_events(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "3. Events Analysis", 1)
ev = results.events_summary
chts = results.charts
_callout(
doc,
"Event Story",
"This section focuses on where event burden is building, where serious outcomes are concentrated, and what special risk signals are visible in timing and motor vehicle data.",
fill="f7fbff",
accent=TEAL,
)
_spacer(doc)
# Monthly trend chart
_heading(doc, "3.1 Monthly Trend", 2)
_add_chart(doc, chts, "events_monthly", 6.0)
_spacer(doc)
# Consequence chart
_heading(doc, "3.2 Consequence Severity", 2)
_add_chart(doc, chts, "consequence", 5.5)
_spacer(doc)
# Event type table
_heading(doc, "3.3 Event Type Breakdown", 2)
event_types = ev.get("event_type_counts", {})
total_events = max(ev.get("total", 1), 1)
et_rows = [[t, str(c), f"{c/total_events*100:.1f}%"] for t, c in event_types.items()]
_add_table(doc, ["Event Type", "Count", "%"], et_rows, [3.0, 1.2, 1.0])
_spacer(doc)
# CRP table
crp = ev.get("crp_counts", {})
if crp:
_heading(doc, "3.4 Critical Risk Protocols Involved", 2)
crp_rows = [[c, str(v)] for c, v in crp.items()]
_add_table(doc, ["CRP", "Count"], crp_rows, [4.0, 1.5])
_spacer(doc)
# Root cause table
rc = ev.get("root_cause_counts", {})
if rc:
_heading(doc, "3.5 Root Cause Categories", 2)
rc_rows = [[r, str(v)] for r, v in rc.items()]
_add_table(doc, ["Root Cause", "Count"], rc_rows, [4.0, 1.5])
_spacer(doc)
serious_projects = ev.get("serious_projects", {})
serious_locations = ev.get("serious_locations", {})
if serious_projects or serious_locations:
_heading(doc, "3.6 Serious Event Hotspots", 2)
_para(doc,
"These are the projects and locations with the highest counts of moderate, major, or substantial events in the analysis window.",
size_pt=11, colour=NAVY)
_add_chart(doc, chts, "serious_hotspots", 5.8)
_spacer(doc)
if serious_projects:
rows = [[k, str(v)] for k, v in serious_projects.items()]
_add_table(doc, ["Project", "Serious Events"], rows, [4.0, 1.5])
_spacer(doc)
if serious_locations:
rows = [[k, str(v)] for k, v in serious_locations.items()]
_add_table(doc, ["Location", "Serious Events"], rows, [4.0, 1.5])
_spacer(doc)
time_buckets = ev.get("serious_time_buckets", {})
if time_buckets:
_heading(doc, "3.7 Serious Event Timing", 2)
_para(doc,
f"Time-of-day information was available for {ev.get('serious_time_coverage_pct', 0):.1f}% of serious events.",
size_pt=11, colour=NAVY)
rows = [[bucket, str(count)] for bucket, count in time_buckets.items()]
_add_table(doc, ["Time of Day", "Serious Events"], rows, [3.5, 1.5])
_spacer(doc)
motor = ev.get("motor_vehicle", {})
if motor.get("count", 0) > 0:
_heading(doc, "3.8 Motor Vehicle Events", 2)
mv_rows = [
["Total Motor Vehicle Events", str(motor.get("count", 0))],
["Share of All Events", f"{motor.get('pct_total', 0):.1f}%"],
["Moderate+ MV Events", str(motor.get("serious_count", 0))],
["Serious Rate Within MV Events", f"{motor.get('serious_pct_within_mve', 0):.1f}%"],
]
_add_table(doc, ["Metric", "Value"], mv_rows, [3.5, 2.0])
_spacer(doc)
if motor.get("top_projects"):
rows = [[k, str(v)] for k, v in motor.get("top_projects", {}).items()]
_add_table(doc, ["Project", "MV Events"], rows, [4.0, 1.5])
_spacer(doc)
rows = []
for label, values in [
("Road Type", motor.get("road_types", {})),
("Road Condition", motor.get("conditions", {})),
("Vehicle Type", motor.get("vehicle_types", {})),
]:
for key, val in list(values.items())[:4]:
rows.append([label, key, str(val)])
if rows:
_add_table(doc, ["Category", "Pattern", "Count"], rows, [1.5, 3.0, 1.0])
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Leading Activity Overview
# ─────────────────────────────────────────────────────────────────────────────
def _section_leading_overview(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "4. Safety Energy — Leading Activity Overview", 1)
lead = results.leading_summary
trends = results.trends
chts = results.charts
_callout(
doc,
"Leading Activity Readout",
"Safety Energy combines LLC, CCC, and OCC activity. The deeper two-year review is designed to show not just volume, "
"but whether the records look rich, preventive, and useful for leadership learning.",
fill="eef6fb",
accent=TEAL,
)
_spacer(doc)
# Activity mix donut
_heading(doc, "4.1 Activity Type Mix", 2)
_add_chart(doc, chts, "activity_mix", 4.5)
_spacer(doc)
# Totals table
totals = lead.get("totals", {})
avg_at_risk = lead.get("avg_at_risk", {})
if totals:
rows = [
[atype,
str(totals.get(atype, 0)),
f"{avg_at_risk.get(atype, 0):.2f}"]
for atype in ["LLC", "CCC", "OCC"]
if totals.get(atype, 0) > 0
]
_add_table(
doc,
["Activity Type", "Total Count", "Avg At-Risk Aspects per Activity"],
rows,
[2.5, 1.5, 3.0],
)
_spacer(doc)
# Monthly trend chart
_heading(doc, "4.2 Monthly Activity Trend", 2)
_para(doc, f"Overall trend: {lead.get('activity_trend', 'N/A')}", size_pt=11, colour=GREY)
_add_chart(doc, chts, "leading_monthly", 6.0)
_spacer(doc)
# BU breakdown
_heading(doc, "4.3 Activity by Business Unit", 2)
bu_by_type = lead.get("bu_by_type", {})
all_bus: list[str] = sorted(set(
bu for d in bu_by_type.values() for bu in d.keys()
))
if all_bus:
rows = [
[bu] + [str(bu_by_type.get(at, {}).get(bu, 0)) for at in ["LLC", "CCC", "OCC"]]
for bu in all_bus
]
_add_table(doc,
["Business Unit", "LLC", "CCC", "OCC"],
rows,
[2.8, 1.0, 1.0, 1.0])
_spacer(doc)
# LLC topic breakdown (from LLC_Data)
_heading(doc, "4.4 LLC Conversation Topics", 2)
_para(doc,
"The following topics were most frequently recorded in Leader Learning Conversations. "
"Topic coverage indicates where leaders are directing their field conversations.",
size_pt=11, colour=NAVY)
_add_chart(doc, chts, "llc_topics", 5.5)
_spacer(doc)
top_topics = lead.get("top_topics", {})
if top_topics:
rows = [[k, str(v)] for k, v in list(top_topics.items())[:12]]
_add_table(doc, ["Topic", "Count"], rows, [4.0, 1.5])
_spacer(doc)
# CRP focus
_heading(doc, "4.5 CRP Focus Areas in LLCs", 2)
_add_chart(doc, chts, "crp_focus", 5.5)
_spacer(doc)
# Top leaders
_heading(doc, "4.6 Most Active Leaders (LLC)", 2)
_add_chart(doc, chts, "top_leaders", 5.5)
_spacer(doc)
top_leaders = lead.get("top_leaders", {})
if top_leaders:
rows = [[l, str(c)] for l, c in list(top_leaders.items())[:12]]
_add_table(doc, ["Leader", "LLC Count"], rows, [4.0, 1.5])
_spacer(doc)
# Two-year trend and quality view
_heading(doc, "4.7 Rolling Two-Year Trend and Quality View", 2)
_para(doc,
f"This view uses a rolling two-year Safety Energy window from "
f"{trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}. "
"It is designed to answer not just whether activities were completed, but whether "
"the records suggest meaningful learning, risk recognition, and follow-up quality.",
size_pt=11, colour=NAVY)
_para(doc, trends.get("proxy_note", ""), size_pt=10, colour=GREY)
_add_chart(doc, chts, "quality_trend", 6.0)
_spacer(doc)
_add_chart(doc, chts, "quality_mix", 6.0)
_spacer(doc)
quality_rows = trends.get("quality_by_type", [])
if quality_rows:
rows = [
[
r.get("activity_type", ""),
str(r.get("count", 0)),
f"{r.get('avg_quality', 0):.1f}",
f"{r.get('avg_input_depth', 0):.1f}",
f"{r.get('meaningful_pct', 0):.1f}%",
f"{r.get('high_value_pct', 0):.1f}%",
f"{r.get('shallow_pct', 0):.1f}%",
f"{r.get('follow_up_pct', 0):.1f}%",
]
for r in quality_rows
]
_add_table(
doc,
["Type", "Count", "Avg Quality", "Avg Input Depth", "Meaningful", "High Value", "Shallow", "Follow-up"],
rows,
[0.8, 0.8, 0.9, 1.0, 0.9, 0.9, 0.9, 0.9],
)
_spacer(doc)
input_depth = trends.get("input_depth", {})
if input_depth.get("correlation") is not None:
_heading(doc, "4.8 Input Depth as a Supporting Quality Metric", 2)
_para(doc,
f"Across the two-year Safety Energy window, input depth and quality score are correlated at r = {input_depth.get('correlation'):.2f}. "
f"{input_depth.get('note', '')}",
size_pt=11, colour=NAVY)
by_band = input_depth.get("by_band", [])
if by_band:
rows = [
[
r.get("band", ""),
str(r.get("count", 0)),
f"{r.get('avg_input_depth', 0):.1f}",
f"{r.get('avg_quality', 0):.1f}",
f"{r.get('meaningful_pct', 0):.1f}%",
f"{r.get('high_value_pct', 0):.1f}%",
f"{r.get('shallow_pct', 0):.1f}%",
]
for r in by_band
]
_add_table(
doc,
["Band", "Count", "Avg Input Depth", "Avg Quality", "Meaningful", "High Value", "Shallow"],
rows,
[0.9, 0.8, 1.0, 0.9, 0.9, 0.9, 0.9],
)
_spacer(doc)
top_themes = trends.get("top_themes", {})
if top_themes:
_para(doc, "Most common recurring themes in Safety Energy narratives:", bold=True, size_pt=11, colour=NAVY)
for theme, count in list(top_themes.items())[:5]:
_bullet(doc, f"{theme}: {count} mentions")
_spacer(doc)
_heading(doc, "4.9 CCC / OCCC / LLC Value Signals", 2)
activity_insights = trends.get("activity_insights", {})
for atype in ["CCC", "OCC", "LLC"]:
insight = activity_insights.get(atype, {})
if not insight:
continue
_para(
doc,
f"{atype}: average quality {insight.get('avg_quality', 0):.1f}/100, "
f"average input depth {insight.get('avg_input_depth', 0):.1f}/100, "
f"{insight.get('preventive_pct', 0):.1f}% preventive signal, "
f"{insight.get('reactive_pct', 0):.1f}% reactive signal, "
f"{insight.get('repetitive_pct', 0):.1f}% repetitive signal, "
f"{insight.get('shallow_pct', 0):.1f}% shallow.",
size_pt=11,
colour=NAVY,
)
depth = insight.get("input_depth", {})
if depth.get("correlation") is not None:
_para(doc,
f"For {atype}, input depth vs quality correlation is r = {depth.get('correlation'):.2f}.",
size_pt=10, colour=GREY)
top_modules = insight.get("top_modules", {})
if top_modules:
_para(doc, f"Top {atype} focus areas:", bold=True, size_pt=10, colour=GREY)
for label, count in list(top_modules.items())[:4]:
_bullet(doc, f"{label}: {count}")
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Effectiveness of Leading Activities
# ─────────────────────────────────────────────────────────────────────────────
def _section_effectiveness(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "5. Effectiveness of Leading Activities", 1)
eff = results.effectiveness
chts = results.charts
_para(doc,
"This section examines whether leading activity patterns appear associated with "
"event outcomes at a business unit and portfolio level. All findings are associative "
"only — correlation does not imply causation.",
size_pt=11, colour=NAVY)
_spacer(doc)
# Monthly overlay chart
_heading(doc, "5.1 Monthly Activities vs Events Overlay", 2)
_add_chart(doc, chts, "overlay", 6.0)
_spacer(doc)
# Correlation note
_heading(doc, "5.2 Statistical Association", 2)
_para(doc, eff.get("corr_note", "N/A"), size_pt=11, colour=NAVY)
_spacer(doc)
# BU comparison chart
_heading(doc, "5.3 Activities vs Events by Business Unit", 2)
_add_chart(doc, chts, "bu_comparison", 5.5)
_spacer(doc)
# BU table
bu_table = eff.get("bu_table", [])
if bu_table:
rows = [
[r.get("business_unit", ""), str(r.get("activities", 0)),
str(r.get("events", 0))]
for r in bu_table
]
_add_table(doc,
["Business Unit", "Leading Activities", "Events"],
rows,
[3.0, 2.0, 1.5])
_spacer(doc)
# Pattern commentary
high_both = eff.get("high_activity_high_events", [])
high_acts = eff.get("high_activity_low_events", [])
if high_both:
_heading(doc, "5.4 High Activity / High Events — Pattern of Interest", 2)
_para(doc,
"The following Business Units recorded both above-median leading-activity volumes "
"and above-median event counts. This may indicate reactive activity patterns where "
"engagement is increasing in response to events rather than preventing them. "
"Further investigation is recommended.",
size_pt=11, colour=NAVY)
for bu in high_both:
_bullet(doc, bu)
_spacer(doc)
if high_acts:
_heading(doc, "5.5 High Activity / Low Events — Positive Signal", 2)
_para(doc,
"The following Business Units recorded above-median leading-activity volumes "
"and below-median event counts. This pattern is consistent with leading activities "
"having a preventive effect, though this cannot be confirmed from available data.",
size_pt=11, colour=NAVY)
for bu in high_acts:
_bullet(doc, bu)
_spacer(doc)
_para(doc, eff.get("note", ""), size_pt=10, colour=GREY)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 6. At-Risk Behaviours
# ─────────────────────────────────────────────────────────────────────────────
def _section_at_risk(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "6. At-Risk Behaviours", 1)
ar = results.at_risk
chts = results.charts
_para(doc,
"At-risk behaviour themes are identified by analysing free-text fields across all "
"three data sources (Events descriptions, LLC conversation notes, and Safety Energy "
"observations) using keyword matching against known risk categories.",
size_pt=11, colour=NAVY)
_spacer(doc)
_heading(doc, "6.1 Combined Theme Frequency", 2)
_add_chart(doc, chts, "at_risk_themes", 5.5)
_spacer(doc)
combined = ar.get("combined_themes", {})
if combined:
rows = [[k, str(v)] for k, v in combined.items()]
_add_table(
doc,
["Risk Theme", "Weighted Frequency"],
rows,
[3.5, 2.0],
)
_spacer(doc)
# LLC theme focus vs event themes
_heading(doc, "6.2 LLC Conversation Topics vs Event Themes", 2)
llc_themes = ar.get("llc_themes", {})
ev_themes = ar.get("event_themes", {})
if llc_themes or ev_themes:
all_themes = sorted(
set(list(llc_themes.keys()) + list(ev_themes.keys()))
)
rows = [
[t, str(llc_themes.get(t, 0)), str(ev_themes.get(t, 0))]
for t in all_themes
]
_add_table(doc,
["Risk Theme", "LLC Mentions", "Event Mentions"],
rows,
[3.0, 1.5, 1.5])
_spacer(doc)
# Alignment gap
gap = ar.get("gap_themes", [])
if gap:
_heading(doc, "6.3 Topic Alignment Gaps", 2)
_para(doc,
"The following risk themes appear among the top event themes but are under-represented "
"in LLC conversation topics. This may indicate a gap between where safety conversations "
"are focused and where actual events are occurring.",
size_pt=11, colour=NAVY)
for theme in gap:
_bullet(doc, theme)
_spacer(doc)
_para(doc, ar.get("note", ""), size_pt=10, colour=GREY)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 7. Safety Energy ↔ Events Relationship
# ─────────────────────────────────────────────────────────────────────────────
def _section_se_events(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "7. Relationship Between Safety Energy and Events", 1)
rel = results.se_events_rel
chts = results.charts
_callout(
doc,
"Coverage vs Outcome",
"This section links leading activity coverage to event burden so leaders can see which business units, projects, and locations appear strongest, and which need direct intervention.",
fill="f7fbff",
accent=TEAL,
)
_spacer(doc)
# BU comparison table
_heading(doc, "7.1 Business Unit Comparison", 2)
bu_comp = rel.get("bu_comparison", [])
if bu_comp:
rows = [
[str(r.get("business_unit", "")),
str(int(r.get("activities", 0))),
str(int(r.get("events", 0))),
str(r.get("ratio", "") if r.get("ratio", "") is not None else "")]
for r in bu_comp
]
_add_table(doc,
["Business Unit", "Activities", "Events", "Activity:Event Ratio"],
rows,
[2.5, 1.2, 1.0, 2.0])
_spacer(doc)
project_comp = rel.get("project_comparison", {})
if project_comp.get("best") or project_comp.get("watch"):
_heading(doc, "7.2 Project Performance Signals", 2)
_para(doc,
"These project comparisons use recorded Safety Energy activity against recorded events. They are intended as directional signals only, not league tables.",
size_pt=11, colour=NAVY)
_add_chart(doc, chts, "project_quadrant", 5.8)
_spacer(doc)
if project_comp.get("best"):
rows = [
[str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "") if r.get("activity_event_ratio", "") is not None else "")]
for r in project_comp.get("best", [])
]
_add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
_spacer(doc)
if project_comp.get("watch"):
_para(doc, "Projects needing leadership attention:", bold=True, size_pt=11, colour=NAVY)
rows = [
[str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "") if r.get("activity_event_ratio", "") is not None else "")]
for r in project_comp.get("watch", [])[:6]
]
_add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
_spacer(doc)
location_comp = rel.get("location_comparison", {})
if location_comp.get("best") or location_comp.get("watch"):
_heading(doc, "7.3 Location Performance Signals", 2)
if location_comp.get("best"):
rows = [
[str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "") if r.get("activity_event_ratio", "") is not None else "")]
for r in location_comp.get("best", [])
]
_add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
_spacer(doc)
if location_comp.get("watch"):
_para(doc, "Locations needing leadership attention:", bold=True, size_pt=11, colour=NAVY)
rows = [
[str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)),
str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "") if r.get("activity_event_ratio", "") is not None else "")]
for r in location_comp.get("watch", [])[:6]
]
_add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3])
_spacer(doc)
# Spike months
spikes = rel.get("spike_months", [])
if spikes:
_heading(doc, "7.4 Event Spike Periods", 2)
_para(doc,
"The following periods recorded above-average event counts coinciding with "
"below-average leading-activity volumes. These periods may warrant retrospective "
"review.",
size_pt=11, colour=NAVY)
for m in spikes:
_bullet(doc, m)
_spacer(doc)
# Topic alignment note
_heading(doc, "7.5 Topic Alignment Observation", 2)
_para(doc, rel.get("alignment_note", ""), size_pt=11, colour=NAVY)
llc_top = rel.get("llc_top_topics", [])
ev_top = rel.get("ev_top_rc", [])
if llc_top:
_para(doc, "Top LLC topics:", bold=True, size_pt=11, colour=NAVY)
for t in llc_top:
_bullet(doc, str(t))
if ev_top:
_para(doc, "Top event root causes:", bold=True, size_pt=11, colour=NAVY)
for t in ev_top:
_bullet(doc, str(t))
_spacer(doc)
_para(doc, rel.get("note", ""), size_pt=10, colour=GREY)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 8. Leader Focus Areas
# ─────────────────────────────────────────────────────────────────────────────
def _section_focus_areas(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "8. Leader Focus Areas", 1)
fa = results.focus_areas
trends = results.trends
chts = results.charts
_para(doc,
"This section identifies Business Units and leaders that warrant specific leadership "
"attention based on activity volumes, event rates, and observed trends.",
size_pt=11, colour=NAVY)
_spacer(doc)
# BU summary table
_heading(doc, "8.1 Business Unit Activity and Event Summary", 2)
bu_summary = fa.get("bu_summary", [])
if bu_summary:
rows = [
[str(r.get("business_unit", "")),
str(r.get("activities", 0)),
str(r.get("events", 0))]
for r in bu_summary
]
_add_table(doc,
["Business Unit", "Leading Activities", "Events"],
rows,
[3.0, 2.0, 1.5])
_spacer(doc)
# Declining BUs
declining = fa.get("declining_bus", [])
if declining:
_heading(doc, "8.2 Declining Activity Units", 2)
_para(doc,
"The following Business Units recorded significantly lower leading-activity "
"volumes in the second half of the analysis period compared to the first half. "
"Leaders in these units should be engaged to understand and address the decline.",
size_pt=11, colour=NAVY)
for bu in declining:
_bullet(doc, bu)
_spacer(doc)
leadership_focus = trends.get("leadership_focus", [])
if leadership_focus:
_heading(doc, "8.3 Leadership Watchouts from Two-Year Safety Energy Trends", 2)
for item in leadership_focus[:5]:
_bullet(doc, item)
_spacer(doc)
low_value_units = trends.get("high_volume_low_value", [])
if low_value_units:
_heading(doc, "8.4 High-Volume / Low-Value Hotspots", 2)
_para(doc,
"These areas are recording substantial activity volume, but the record quality signals "
"suggest the activity may be drifting toward compliance-only completion rather than strong learning.",
size_pt=11, colour=NAVY)
_add_chart(doc, chts, "low_value_units", 5.8)
_spacer(doc)
rows = [
[
r.get("activity_type", ""),
str(r.get("business_unit", "")),
str(r.get("count", 0)),
f"{r.get('avg_quality', 0):.1f}",
f"{r.get('shallow_pct', 0):.1f}%",
]
for r in low_value_units[:8]
]
_add_table(doc,
["Type", "Business Unit", "Count", "Avg Quality", "Shallow"],
rows,
[0.9, 2.8, 0.9, 1.0, 1.0])
_spacer(doc)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 9. Recommended Actions
# ─────────────────────────────────────────────────────────────────────────────
def _section_recommendations(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "9. Recommended Actions", 1)
_callout(
doc,
"Action Agenda",
"These actions are generated directly from the event hotspots, leading-activity quality signals, and project/location performance patterns in the report.",
fill="eef6fb",
accent=TEAL,
)
_spacer(doc)
for i, rec in enumerate(results.recommendations[:10], 1):
priority = "Immediate" if i <= 3 else "Next"
_callout(doc, f"{priority} Priority", rec, fill="f7fbff" if i <= 3 else "ffffff", accent=ALERT if i <= 3 else NAVY)
_spacer(doc)
_spacer(doc)
_heading(doc, "Ongoing Monitoring Recommendations", 2)
for item in [
"Track leading-activity volumes monthly by Business Unit against a set target (e.g. "
"minimum 4 LLCs per leader per month).",
"Track CCC/OCC/LLC quality monthly using shallow-entry rate, follow-up rate, and average quality score.",
"Review LLC topic coverage quarterly to ensure alignment with top event root causes.",
"Re-run this full report monthly or quarterly as new data becomes available.",
"Supplement quantitative analysis with qualitative review of LLC content quality.",
"Use the Business Unit activity-to-event ratio table to guide where SHEQ advisor "
"engagement should be prioritised.",
]:
_bullet(doc, item)
_page_break(doc)
# ─────────────────────────────────────────────────────────────────────────────
# 10. Methodology and Caveats
# ─────────────────────────────────────────────────────────────────────────────
def _section_methodology(doc: Document, results: AnalysisResults) -> None:
_heading(doc, "10. Methodology and Caveats", 1)
trends = results.trends
_heading(doc, "10.1 Data Sources", 2)
rows = [
["Events.xlsx",
"Incident and event records exported from the Ventia safety management system. "
"Covers all event types including injuries, motor vehicle events, close calls, "
"environmental events, and nonconformances."],
["Safety_Energy.xlsx",
"Combined leading activity export covering all three activity types: Leader Learning "
"Conversations (LLC), Critical Control Checks (CCC), and Operational Control Checks (OCC). "
"This is treated as the primary leading indicator data source."],
["LLC_Data.xlsx",
"Supplementary LLC export providing richer free-text data (conversation topics, CRP focus, "
"at-risk observations). Used primarily for theme and topic analysis. Record counts are "
"closely aligned with the LLC records in Safety_Energy.xlsx."],
]
_add_table(doc, ["Source", "Description"], rows, [2.0, 4.5])
_spacer(doc)
_heading(doc, "10.2 Activity Type Definitions", 2)
_para(doc,
"Safety Energy is the combined analytical domain. It encompasses three activity types:",
size_pt=11, colour=NAVY)
for item in [
"LLC (Leader Learning Conversation): A structured conversation between a leader and "
"a worker or work group, focused on safety topics, risk identification, and critical "
"controls.",
"CCC (Critical Control Check): A field verification that critical controls for high-risk "
"activities are in place and effective (e.g. working at height, hazardous energies).",
"OCC (Operational Control Check): A broader operational inspection or check covering "
"a range of work-area risk topics.",
"Note: In some legacy documentation or older exports, the label 'OCC' was used broadly "
"to cover what is now split into CCC and OCC. The current Safety_Energy.xlsx export "
"correctly separates these via the ModuleType field. No manual deduplication was required.",
]:
_bullet(doc, item)
_spacer(doc)
_heading(doc, "10.3 Analytical Approach", 2)
for item in [
"Monthly trend analysis: Activities and events are aggregated by calendar month. "
"Trend direction is estimated by comparing recent-period averages against prior-period averages.",
f"Rolling two-year Safety Energy review: deeper trend and quality analysis uses a {trends.get('window_months', 24)}-month "
f"window from {trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}, anchored to the latest Safety Energy record.",
"Effectiveness analysis: Business unit-level aggregates and overall monthly correlations "
"are used as proxies for effectiveness. Correlation is computed using Pearson r.",
"At-risk theme extraction: Free-text fields are scanned using a predefined keyword "
"dictionary (see config.py). Frequency counts are combined across sources with a "
"2× weight applied to event-source mentions (lagging signal).",
"Business Unit focus: BUs are flagged as 'declining' if second-half activity volume "
"is less than 70% of first-half volume within the analysis window.",
"Leading-activity quality scoring: records are scored using practical proxies including text richness, specificity, "
"risk recognition, action/follow-up language, learning evidence, and penalties for generic or duplicated wording.",
]:
_bullet(doc, item)
_spacer(doc)
_heading(doc, "10.4 Caveats and Limitations", 2)
for cav in results.caveats:
_bullet(doc, cav)
_spacer(doc)
_para(doc,
f"Report generated: {datetime.now().strftime('%d %B %Y at %H:%M')}",
size_pt=9, colour=GREY)
# ─────────────────────────────────────────────────────────────────────────────
# Main entry point
# ─────────────────────────────────────────────────────────────────────────────
def build_report(results: AnalysisResults, output_dir: str) -> str:
"""
Build the full SHEQ DOCX report from an AnalysisResults object.
Parameters
----------
results : output of analysis_engine.run_full_analysis
output_dir : directory to write the .docx file into
Returns
-------
Absolute path to the generated .docx file.
"""
os.makedirs(output_dir, exist_ok=True)
doc = Document()
_bootstrap_styles(doc)
log.info("Building DOCX report...")
_title_page(doc, results)
_section_executive_summary(doc, results)
_section_data_quality(doc, results)
_section_events(doc, results)
_section_leading_overview(doc, results)
_section_effectiveness(doc, results)
_section_at_risk(doc, results)
_section_se_events(doc, results)
_section_focus_areas(doc, results)
_section_recommendations(doc, results)
_section_methodology(doc, results)
output_path = os.path.join(
output_dir,
f"SHEQ_Safety_Performance_{datetime.now().strftime('%Y%m%d_%H%M')}.docx",
)
doc.save(output_path)
log.info("Report saved to %s", output_path)
return output_path