695 lines
30 KiB
Python
695 lines
30 KiB
Python
"""
|
|
SHEQ Incident Analysis Engine
|
|
Generates charts and a DOCX report comparing two Project Director periods.
|
|
|
|
Usage:
|
|
from analysis import run_analysis
|
|
run_analysis("All_Events__5_.xlsx", "2024-01-01", "2025-04-01",
|
|
"Matthew Arthur", "Manga", output_dir="output")
|
|
"""
|
|
|
|
import os
|
|
import pandas as pd
|
|
import matplotlib
|
|
matplotlib.use("Agg")
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from docx import Document
|
|
from docx.shared import Inches, Pt, Cm, RGBColor, Emu
|
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
from docx.enum.table import WD_TABLE_ALIGNMENT
|
|
from docx.oxml.ns import qn, nsdecls
|
|
from docx.oxml import parse_xml
|
|
from io import BytesIO
|
|
|
|
|
|
# ── Brand Colours (see DESIGN.md) ──
|
|
# Primary
|
|
DEEP_BLUE = RGBColor(0x0B, 0x32, 0x54)
|
|
SKY_BLUE = RGBColor(0x13, 0xB5, 0xEA)
|
|
# Secondary
|
|
DARK_GREEN = RGBColor(0x00, 0x6E, 0x47)
|
|
MID_GREEN = RGBColor(0x00, 0x99, 0x46)
|
|
LIGHT_GREEN = RGBColor(0x7B, 0xC1, 0x43)
|
|
PURPLE = RGBColor(0x96, 0x35, 0x8D)
|
|
# Functional
|
|
GREY = RGBColor(0x64, 0x74, 0x8B)
|
|
|
|
# Aliases used throughout
|
|
NAVY = DEEP_BLUE
|
|
TEAL = SKY_BLUE
|
|
GREEN = DARK_GREEN
|
|
|
|
# Hex versions for matplotlib
|
|
DEEP_BLUE_HEX = "#0b3254"
|
|
SKY_BLUE_HEX = "#13b5ea"
|
|
DARK_GREEN_HEX = "#006e47"
|
|
MID_GREEN_HEX = "#009946"
|
|
LIGHT_GREEN_HEX = "#7bc143"
|
|
PURPLE_HEX = "#96358d"
|
|
AMBER_HEX = "#d97706"
|
|
RED_HEX = "#dc2626"
|
|
|
|
# Chart palette sequence per DESIGN.md
|
|
CHART_PALETTE = [DEEP_BLUE_HEX, SKY_BLUE_HEX, DARK_GREEN_HEX, MID_GREEN_HEX,
|
|
LIGHT_GREEN_HEX, PURPLE_HEX, AMBER_HEX, RED_HEX]
|
|
|
|
# PD comparison colours
|
|
MA_HEX = DEEP_BLUE_HEX # PD1 = Deep Blue
|
|
MG_HEX = SKY_BLUE_HEX # PD2 = Sky Blue
|
|
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# DATA LOADING & PREPARATION
|
|
# ═══════════════════════════════════════════════
|
|
|
|
def load_and_prepare(filepath, start_date, split_date):
|
|
"""Load Excel, filter by date range, add PD column."""
|
|
df = pd.read_excel(filepath)
|
|
df["Event Date"] = pd.to_datetime(df["Event Date"])
|
|
df = df[df["Event Date"] >= pd.Timestamp(start_date)].copy()
|
|
df["Year"] = df["Event Date"].dt.year
|
|
df["Month"] = df["Event Date"].dt.month
|
|
df["MonthName"] = df["Event Date"].dt.strftime("%b")
|
|
df["DOW"] = df["Event Date"].dt.day_name()
|
|
df["YearMonth"] = df["Event Date"].dt.to_period("M")
|
|
df["PD"] = df["Event Date"].apply(
|
|
lambda x: "pd1" if x < pd.Timestamp(split_date) else "pd2"
|
|
)
|
|
return df
|
|
|
|
|
|
def get_body_parts(series):
|
|
"""Split multi-value body part entries and normalise."""
|
|
parts = []
|
|
for val in series.dropna():
|
|
for part in str(val).split(","):
|
|
part = part.strip()
|
|
if part and "unspecified" not in part.lower():
|
|
parts.append(part)
|
|
return pd.Series(parts)
|
|
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# CHART GENERATION
|
|
# ═══════════════════════════════════════════════
|
|
|
|
def _save(fig, path):
|
|
fig.tight_layout()
|
|
fig.savefig(path, dpi=200, bbox_inches="tight", facecolor="white")
|
|
plt.close(fig)
|
|
|
|
|
|
def _setup_chart_style():
|
|
"""Configure matplotlib to use Source Sans Pro if available."""
|
|
import matplotlib.font_manager as fm
|
|
available = [f.name for f in fm.fontManager.ttflist]
|
|
if "Source Sans Pro" in available:
|
|
plt.rcParams["font.family"] = "Source Sans Pro"
|
|
elif "Source Sans 3" in available:
|
|
plt.rcParams["font.family"] = "Source Sans 3"
|
|
else:
|
|
plt.rcParams["font.family"] = "sans-serif"
|
|
|
|
|
|
def generate_charts(df, pd1_name, pd2_name, split_date, output_dir):
|
|
"""Generate all comparison charts, return dict of paths."""
|
|
_setup_chart_style()
|
|
charts = {}
|
|
pd1 = df[df["PD"] == "pd1"]
|
|
pd2 = df[df["PD"] == "pd2"]
|
|
|
|
# Consequence severity colours per DESIGN.md
|
|
CONS_COLORS = [DARK_GREEN_HEX, AMBER_HEX, RED_HEX, PURPLE_HEX]
|
|
|
|
# 1. Monthly trend by PD
|
|
fig, ax = plt.subplots(figsize=(10, 4))
|
|
start_period = df["Event Date"].min().to_period("M")
|
|
end_period = df["Event Date"].max().to_period("M")
|
|
months_all = pd.period_range(start_period, end_period, freq="M")
|
|
monthly = df.groupby(["YearMonth", "PD"]).size().unstack(fill_value=0).reindex(months_all, fill_value=0)
|
|
x = range(len(months_all))
|
|
labels = [m.strftime("%b %y") for m in months_all]
|
|
ma_vals = monthly.get("pd1", pd.Series(0, index=months_all)).values
|
|
mg_vals = monthly.get("pd2", pd.Series(0, index=months_all)).values
|
|
ax.bar(x, ma_vals, color=MA_HEX, label=pd1_name, width=0.7, alpha=0.9)
|
|
ax.bar(x, mg_vals, bottom=ma_vals, color=MG_HEX, label=pd2_name, width=0.7, alpha=0.9)
|
|
split_m = pd.Timestamp(split_date).to_period("M")
|
|
if split_m in months_all:
|
|
trans_idx = list(months_all).index(split_m)
|
|
ax.axvline(x=trans_idx - 0.5, color=RED_HEX, linestyle="--", linewidth=1.5, alpha=0.7)
|
|
ax.text(trans_idx - 0.3, max(max(ma_vals + mg_vals), 1) * 0.95, "PD Transition",
|
|
fontsize=9, color=RED_HEX, ha="left")
|
|
ax.set_xticks(x)
|
|
ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
|
|
ax.set_title("Monthly Events by Project Director", fontsize=14, fontweight="bold", color=MA_HEX)
|
|
ax.set_ylabel("Events")
|
|
ax.legend(loc="upper right")
|
|
ax.spines["top"].set_visible(False)
|
|
ax.spines["right"].set_visible(False)
|
|
p = os.path.join(output_dir, "monthly_by_pd.png")
|
|
_save(fig, p)
|
|
charts["monthly_by_pd"] = p
|
|
|
|
# 2. Event type comparison
|
|
evt_types = df["Event Type"].value_counts().index[:8]
|
|
ma_evt = pd1["Event Type"].value_counts().reindex(evt_types, fill_value=0)
|
|
mg_evt = pd2["Event Type"].value_counts().reindex(evt_types, fill_value=0)
|
|
fig, ax = plt.subplots(figsize=(9, 5))
|
|
y = np.arange(len(evt_types))
|
|
h = 0.35
|
|
ax.barh(y - h / 2, ma_evt.values, h, label=pd1_name, color=MA_HEX)
|
|
ax.barh(y + h / 2, mg_evt.values, h, label=pd2_name, color=MG_HEX)
|
|
ax.set_yticks(y)
|
|
ax.set_yticklabels(evt_types, fontsize=10)
|
|
ax.invert_yaxis()
|
|
ax.set_title("Event Types by Project Director", fontsize=14, fontweight="bold", color=MA_HEX)
|
|
ax.legend()
|
|
ax.spines["top"].set_visible(False)
|
|
ax.spines["right"].set_visible(False)
|
|
for i, (v1, v2) in enumerate(zip(ma_evt.values, mg_evt.values)):
|
|
ax.text(v1 + 0.2, i - h / 2, str(v1), va="center", fontsize=9, color=MA_HEX)
|
|
ax.text(v2 + 0.2, i + h / 2, str(v2), va="center", fontsize=9, color=MG_HEX)
|
|
p = os.path.join(output_dir, "event_type_by_pd.png")
|
|
_save(fig, p)
|
|
charts["event_type_by_pd"] = p
|
|
|
|
# 3. Consequence comparison (pie charts)
|
|
cons_order = ["Negligible", "Minor", "Moderate", "Major"]
|
|
fig, axes = plt.subplots(1, 2, figsize=(9, 3.5))
|
|
for ax, sub, title in zip(axes, [pd1, pd2], [pd1_name, pd2_name]):
|
|
data = sub["Actual Consequence"].value_counts().reindex(cons_order, fill_value=0)
|
|
ax.pie(data.values, labels=cons_order, autopct="%1.0f%%", colors=CONS_COLORS, startangle=140,
|
|
textprops={"fontsize": 9})
|
|
ax.set_title(title, fontsize=13, fontweight="bold", color=MA_HEX)
|
|
p = os.path.join(output_dir, "consequence_by_pd.png")
|
|
_save(fig, p)
|
|
charts["consequence_by_pd"] = p
|
|
|
|
# 4. Day of week
|
|
dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
|
fig, ax = plt.subplots(figsize=(9, 4))
|
|
x_arr = np.arange(len(dow_order))
|
|
w = 0.35
|
|
ma_d = pd1["DOW"].value_counts().reindex(dow_order, fill_value=0)
|
|
mg_d = pd2["DOW"].value_counts().reindex(dow_order, fill_value=0)
|
|
b1 = ax.bar(x_arr - w / 2, ma_d.values, w, label=pd1_name, color=MA_HEX)
|
|
b2 = ax.bar(x_arr + w / 2, mg_d.values, w, label=pd2_name, color=MG_HEX)
|
|
ax.set_xticks(x_arr)
|
|
ax.set_xticklabels([d[:3] for d in dow_order])
|
|
ax.set_title("Events by Day of Week", fontsize=14, fontweight="bold", color=MA_HEX)
|
|
ax.legend()
|
|
ax.spines["top"].set_visible(False)
|
|
ax.spines["right"].set_visible(False)
|
|
for b in b1:
|
|
if b.get_height() > 0:
|
|
ax.text(b.get_x() + b.get_width() / 2, b.get_height() + 0.3, str(int(b.get_height())),
|
|
ha="center", fontsize=9)
|
|
for b in b2:
|
|
if b.get_height() > 0:
|
|
ax.text(b.get_x() + b.get_width() / 2, b.get_height() + 0.3, str(int(b.get_height())),
|
|
ha="center", fontsize=9)
|
|
p = os.path.join(output_dir, "dow_by_pd.png")
|
|
_save(fig, p)
|
|
charts["dow_by_pd"] = p
|
|
|
|
# 5. Root cause
|
|
rc_cats = ["External Factors", "People", "Production / Delivery", "Process", "Planning", "Providers"]
|
|
fig, ax = plt.subplots(figsize=(9, 4))
|
|
y = np.arange(len(rc_cats))
|
|
h = 0.35
|
|
ma_rc = pd1["Root Cause Category"].value_counts().reindex(rc_cats, fill_value=0)
|
|
mg_rc = pd2["Root Cause Category"].value_counts().reindex(rc_cats, fill_value=0)
|
|
ax.barh(y - h / 2, ma_rc.values, h, label=pd1_name, color=MA_HEX)
|
|
ax.barh(y + h / 2, mg_rc.values, h, label=pd2_name, color=MG_HEX)
|
|
ax.set_yticks(y)
|
|
ax.set_yticklabels(rc_cats, fontsize=10)
|
|
ax.invert_yaxis()
|
|
ax.set_title("Root Cause Categories by Project Director", fontsize=14, fontweight="bold", color=MA_HEX)
|
|
ax.legend()
|
|
ax.spines["top"].set_visible(False)
|
|
ax.spines["right"].set_visible(False)
|
|
p = os.path.join(output_dir, "rootcause_by_pd.png")
|
|
_save(fig, p)
|
|
charts["rootcause_by_pd"] = p
|
|
|
|
# 6. CRP comparison
|
|
crp_all = df["CRPInvolved"].value_counts()
|
|
crp_active = crp_all[~crp_all.index.isin(["None Identified", "Under Investigation"])].head(8)
|
|
crp_cats = crp_active.index
|
|
fig, ax = plt.subplots(figsize=(9, 4.5))
|
|
y = np.arange(len(crp_cats))
|
|
ma_c = pd1["CRPInvolved"].value_counts().reindex(crp_cats, fill_value=0)
|
|
mg_c = pd2["CRPInvolved"].value_counts().reindex(crp_cats, fill_value=0)
|
|
ax.barh(y - h / 2, ma_c.values, h, label=pd1_name, color=MA_HEX)
|
|
ax.barh(y + h / 2, mg_c.values, h, label=pd2_name, color=MG_HEX)
|
|
ax.set_yticks(y)
|
|
ax.set_yticklabels(crp_cats, fontsize=9)
|
|
ax.invert_yaxis()
|
|
ax.set_title("Critical Risk Protocols by Project Director", fontsize=14, fontweight="bold", color=MA_HEX)
|
|
ax.legend()
|
|
ax.spines["top"].set_visible(False)
|
|
ax.spines["right"].set_visible(False)
|
|
p = os.path.join(output_dir, "crp_by_pd.png")
|
|
_save(fig, p)
|
|
charts["crp_by_pd"] = p
|
|
|
|
# 7. Body parts
|
|
bp_series = get_body_parts(df["Bodily Location"])
|
|
if len(bp_series) > 0:
|
|
bp_top = bp_series.value_counts().head(10)
|
|
fig, ax = plt.subplots(figsize=(8, 4))
|
|
ax.barh(range(len(bp_top)), bp_top.values, color=DARK_GREEN_HEX)
|
|
ax.set_yticks(range(len(bp_top)))
|
|
ax.set_yticklabels(bp_top.index, fontsize=10)
|
|
ax.invert_yaxis()
|
|
for i, v in enumerate(bp_top.values):
|
|
ax.text(v + 0.1, i, str(v), va="center", fontsize=11, fontweight="bold")
|
|
ax.set_title("Top Injured Body Parts", fontsize=14, fontweight="bold", color=MA_HEX)
|
|
ax.spines["top"].set_visible(False)
|
|
ax.spines["right"].set_visible(False)
|
|
p = os.path.join(output_dir, "body_parts.png")
|
|
_save(fig, p)
|
|
charts["body_parts"] = p
|
|
|
|
return charts
|
|
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# DOCX GENERATION
|
|
# ═══════════════════════════════════════════════
|
|
|
|
def _set_cell_shading(cell, color_hex):
|
|
"""Apply background shading to a table cell."""
|
|
shading = parse_xml(f'<w:shd {nsdecls("w")} w:fill="{color_hex}" w:val="clear"/>')
|
|
cell._tc.get_or_add_tcPr().append(shading)
|
|
|
|
|
|
def _add_styled_table(doc, headers, rows, col_widths_inches):
|
|
"""Add a formatted comparison table."""
|
|
table = doc.add_table(rows=1 + len(rows), cols=len(headers))
|
|
table.alignment = WD_TABLE_ALIGNMENT.LEFT
|
|
table.style = "Table Grid"
|
|
|
|
# Header row
|
|
for i, h in enumerate(headers):
|
|
cell = table.rows[0].cells[i]
|
|
cell.text = ""
|
|
p = cell.paragraphs[0]
|
|
run = p.add_run(h)
|
|
run.bold = True
|
|
run.font.size = Pt(9)
|
|
run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
|
|
run.font.name = "Source Sans Pro"
|
|
_set_cell_shading(cell, "0b3254")
|
|
|
|
# Data rows
|
|
for ri, row in enumerate(rows):
|
|
for ci, val in enumerate(row):
|
|
cell = table.rows[ri + 1].cells[ci]
|
|
cell.text = ""
|
|
p = cell.paragraphs[0]
|
|
run = p.add_run(str(val))
|
|
run.font.size = Pt(9)
|
|
run.font.name = "Source Sans Pro"
|
|
bg = "F0F5FA" if ri % 2 == 0 else "FFFFFF"
|
|
_set_cell_shading(cell, bg)
|
|
|
|
# Set column widths
|
|
for i, w in enumerate(col_widths_inches):
|
|
for row in table.rows:
|
|
row.cells[i].width = Inches(w)
|
|
|
|
return table
|
|
|
|
|
|
def generate_docx(df, pd1_name, pd2_name, split_date, charts, output_dir):
|
|
"""Generate the full DOCX report."""
|
|
doc = Document()
|
|
|
|
# Set default font
|
|
style = doc.styles["Normal"]
|
|
style.font.name = "Source Sans Pro"
|
|
style.font.size = Pt(11)
|
|
|
|
# Heading styles
|
|
for level, size, color in [(1, 16, NAVY), (2, 13, TEAL)]:
|
|
hs = doc.styles[f"Heading {level}"]
|
|
hs.font.name = "Source Sans Pro"
|
|
hs.font.size = Pt(size)
|
|
hs.font.color.rgb = color
|
|
hs.font.bold = True
|
|
|
|
pd1 = df[df["PD"] == "pd1"]
|
|
pd2 = df[df["PD"] == "pd2"]
|
|
total = len(df)
|
|
pd1_months = max(1, (pd.Timestamp(split_date) - df["Event Date"].min()).days / 30.44)
|
|
pd2_months = max(1, (df["Event Date"].max() - pd.Timestamp(split_date)).days / 30.44 + 1)
|
|
|
|
pd1_start = pd1["Event Date"].min().strftime("%b %Y") if len(pd1) > 0 else "N/A"
|
|
pd1_end = pd1["Event Date"].max().strftime("%b %Y") if len(pd1) > 0 else "N/A"
|
|
pd2_start = pd2["Event Date"].min().strftime("%b %Y") if len(pd2) > 0 else "N/A"
|
|
pd2_end = pd2["Event Date"].max().strftime("%b %Y") if len(pd2) > 0 else "N/A"
|
|
|
|
# ── Title page ──
|
|
doc.add_paragraph("")
|
|
doc.add_paragraph("")
|
|
p = doc.add_paragraph()
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
run = p.add_run("SHEQ Incident Analysis")
|
|
run.font.size = Pt(28)
|
|
run.bold = True
|
|
run.font.name = "Source Sans Pro"
|
|
run.font.color.rgb = NAVY
|
|
|
|
p = doc.add_paragraph()
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
run = p.add_run("Far North Waters Project")
|
|
run.font.size = Pt(16)
|
|
run.font.name = "Source Sans Pro"
|
|
run.font.color.rgb = TEAL
|
|
|
|
p = doc.add_paragraph()
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
run = p.add_run(f"{pd1_start} \u2013 {pd2_end} (MTD)")
|
|
run.font.size = Pt(14)
|
|
run.font.name = "Source Sans Pro"
|
|
run.font.color.rgb = TEAL
|
|
|
|
doc.add_paragraph("")
|
|
p = doc.add_paragraph()
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
run = p.add_run("Performance by Project Director")
|
|
run.font.size = Pt(13)
|
|
run.bold = True
|
|
run.font.name = "Source Sans Pro"
|
|
run.font.color.rgb = NAVY
|
|
|
|
p = doc.add_paragraph()
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
run = p.add_run(f"{pd1_name} ")
|
|
run.bold = True
|
|
run.font.color.rgb = NAVY
|
|
run = p.add_run(f"({pd1_start} \u2013 {pd1_end}) | ")
|
|
run.font.color.rgb = GREY
|
|
run = p.add_run(f"{pd2_name} ")
|
|
run.bold = True
|
|
run.font.color.rgb = TEAL
|
|
run = p.add_run(f"({pd2_start} \u2013 {pd2_end})")
|
|
run.font.color.rgb = GREY
|
|
|
|
doc.add_paragraph("")
|
|
p = doc.add_paragraph()
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
run = p.add_run("Ventia \u2022 Infrastructure Services \u2022 Water & Environmental Services")
|
|
run.font.size = Pt(10)
|
|
run.font.color.rgb = GREY
|
|
|
|
doc.add_page_break()
|
|
|
|
# ── Helper functions ──
|
|
def h1(text):
|
|
doc.add_heading(text, level=1)
|
|
|
|
def h2(text):
|
|
doc.add_heading(text, level=2)
|
|
|
|
def text(t, bold=False):
|
|
p = doc.add_paragraph()
|
|
run = p.add_run(t)
|
|
run.bold = bold
|
|
return p
|
|
|
|
def bullet(t):
|
|
p = doc.add_paragraph(t, style="List Bullet")
|
|
return p
|
|
|
|
def add_chart(name, width=5.5):
|
|
if name in charts:
|
|
doc.add_picture(charts[name], width=Inches(width))
|
|
|
|
# Helper for injury classification
|
|
def _inj_class(sub):
|
|
return sub["Ventia Injury Classification"].value_counts()
|
|
|
|
# ═══════════════════════════════════════════
|
|
# 1. EXECUTIVE SUMMARY
|
|
# ═══════════════════════════════════════════
|
|
h1("1. Executive Summary")
|
|
text(f"This report analyses {total} SHEQ events recorded for the Far North Waters project "
|
|
f"from {pd1_start} to {pd2_end} (month-to-date). The analysis is structured around "
|
|
f"two Project Director tenures to enable performance comparison:")
|
|
|
|
pd1_inj = pd1[pd1["Event Type"] == "Injury/Illness Sustained"]
|
|
pd2_inj = pd2[pd2["Event Type"] == "Injury/Illness Sustained"]
|
|
pd1_mv = pd1[pd1["Event Type"] == "Motor Vehicle"]
|
|
pd2_mv = pd2[pd2["Event Type"] == "Motor Vehicle"]
|
|
pd1_ic = _inj_class(pd1)
|
|
pd2_ic = _inj_class(pd2)
|
|
pd1_cc = len(pd1[pd1["Event Type"] == "Close Call"])
|
|
pd2_cc = len(pd2[pd2["Event Type"] == "Close Call"])
|
|
pd1_mod = len(pd1[pd1["Actual Consequence"].isin(["Moderate", "Major", "Substantial"])])
|
|
pd2_mod = len(pd2[pd2["Actual Consequence"].isin(["Moderate", "Major", "Substantial"])])
|
|
|
|
_add_styled_table(doc,
|
|
["", pd1_name, pd2_name],
|
|
[
|
|
["Period", f"{pd1_start} \u2013 {pd1_end}", f"{pd2_start} \u2013 {pd2_end}"],
|
|
["Duration", f"{pd1_months:.0f} months", f"{pd2_months:.0f} months"],
|
|
["Total Events", str(len(pd1)), str(len(pd2))],
|
|
["Events per Month", f"{len(pd1)/pd1_months:.1f}", f"{len(pd2)/pd2_months:.1f}"],
|
|
["Injuries", f"{len(pd1_inj)} ({len(pd1_inj)/max(len(pd1),1)*100:.1f}%)",
|
|
f"{len(pd2_inj)} ({len(pd2_inj)/max(len(pd2),1)*100:.1f}%)"],
|
|
["Motor Vehicle Events", f"{len(pd1_mv)} ({len(pd1_mv)/max(len(pd1),1)*100:.1f}%)",
|
|
f"{len(pd2_mv)} ({len(pd2_mv)/max(len(pd2),1)*100:.1f}%)"],
|
|
["Lost Time Injuries", str(pd1_ic.get("Lost Time Injury", 0)), str(pd2_ic.get("Lost Time Injury", 0))],
|
|
["First Aid Treatments", str(pd1_ic.get("First Aid Treatment", 0)), str(pd2_ic.get("First Aid Treatment", 0))],
|
|
["Close Calls", f"{pd1_cc} ({pd1_cc/max(len(pd1),1)*100:.1f}%)",
|
|
f"{pd2_cc} ({pd2_cc/max(len(pd2),1)*100:.1f}%)"],
|
|
["Moderate+ Consequence", f"{pd1_mod} ({pd1_mod/max(len(pd1),1)*100:.1f}%)",
|
|
f"{pd2_mod} ({pd2_mod/max(len(pd2),1)*100:.1f}%)"],
|
|
["Median Days to Investigate", f"{pd1['Days to Investigate'].dropna().median():.0f}",
|
|
f"{pd2['Days to Investigate'].dropna().median():.0f}"],
|
|
["Median Days to Close", f"{pd1['Days to Close'].dropna().median():.0f}",
|
|
f"{pd2['Days to Close'].dropna().median():.0f}"],
|
|
],
|
|
[2.0, 2.2, 2.3]
|
|
)
|
|
|
|
doc.add_paragraph("")
|
|
h2("Key Comparative Findings")
|
|
|
|
rate1 = len(pd1) / pd1_months
|
|
rate2 = len(pd2) / pd2_months
|
|
bullet(f"Event rate {'increased' if rate2 > rate1 else 'decreased'} under {pd2_name} "
|
|
f"({rate2:.1f}/month vs {rate1:.1f}/month), with Moderate+ consequences at "
|
|
f"{pd2_mod/max(len(pd2),1)*100:.1f}% vs {pd1_mod/max(len(pd1),1)*100:.1f}%.")
|
|
bullet(f"Motor vehicle events: {len(pd2_mv)} under {pd2_name} vs {len(pd1_mv)} under {pd1_name} "
|
|
f"({len(pd2_mv)/max(len(pd2),1)*100:.1f}% vs {len(pd1_mv)/max(len(pd1),1)*100:.1f}%).")
|
|
bullet(f"Close call reporting: {pd2_cc/max(len(pd2),1)*100:.1f}% under {pd2_name} vs "
|
|
f"{pd1_cc/max(len(pd1),1)*100:.1f}% under {pd1_name}.")
|
|
|
|
lti1 = pd1_ic.get("Lost Time Injury", 0)
|
|
lti2 = pd2_ic.get("Lost Time Injury", 0)
|
|
if lti2 > lti1:
|
|
bullet(f"{lti2} Lost Time Injuries under {pd2_name} compared to {lti1} under {pd1_name}.")
|
|
|
|
doc.add_page_break()
|
|
|
|
# ═══════════════════════════════════════════
|
|
# 2. MONTHLY TRENDS
|
|
# ═══════════════════════════════════════════
|
|
h1("2. Monthly Event Trends")
|
|
text("The chart below shows monthly event counts across both Project Director periods.")
|
|
add_chart("monthly_by_pd", 5.8)
|
|
doc.add_page_break()
|
|
|
|
# ═══════════════════════════════════════════
|
|
# 3. EVENT TYPE COMPARISON
|
|
# ═══════════════════════════════════════════
|
|
h1("3. Event Type Comparison")
|
|
add_chart("event_type_by_pd", 5.5)
|
|
|
|
evt_types = df["Event Type"].value_counts().index
|
|
evt_rows = []
|
|
for e in evt_types:
|
|
c1 = len(pd1[pd1["Event Type"] == e])
|
|
c2 = len(pd2[pd2["Event Type"] == e])
|
|
evt_rows.append([e, str(c1), f"{c1/max(len(pd1),1)*100:.1f}%",
|
|
str(c2), f"{c2/max(len(pd2),1)*100:.1f}%"])
|
|
_add_styled_table(doc, ["Event Type", pd1_name, "%", pd2_name, "%"], evt_rows,
|
|
[2.0, 1.1, 0.8, 1.0, 0.8])
|
|
|
|
doc.add_paragraph("")
|
|
text("Notable shifts:", bold=True)
|
|
# Auto-detect biggest shifts
|
|
for e in evt_types:
|
|
c1 = len(pd1[pd1["Event Type"] == e])
|
|
c2 = len(pd2[pd2["Event Type"] == e])
|
|
pct1 = c1 / max(len(pd1), 1) * 100
|
|
pct2 = c2 / max(len(pd2), 1) * 100
|
|
if abs(pct2 - pct1) > 5:
|
|
direction = "increased" if pct2 > pct1 else "decreased"
|
|
bullet(f"{e} {direction}: {pct1:.1f}% \u2192 {pct2:.1f}% ({c1} \u2192 {c2} events).")
|
|
|
|
doc.add_page_break()
|
|
|
|
# ═══════════════════════════════════════════
|
|
# 4. INJURY ANALYSIS
|
|
# ═══════════════════════════════════════════
|
|
h1("4. Injury Analysis")
|
|
h2("4.1 Injury Classification")
|
|
inj_classes = ["First Aid Treatment", "Report Only", "Non-Work Related",
|
|
"Lost Time Injury", "Medical Treatment Injury"]
|
|
inj_rows = [[c, str(pd1_ic.get(c, 0)), str(pd2_ic.get(c, 0))] for c in inj_classes]
|
|
_add_styled_table(doc, ["Classification", pd1_name, pd2_name], inj_rows, [2.5, 1.8, 1.8])
|
|
|
|
h2("4.2 Body Parts Injured")
|
|
add_chart("body_parts", 5.0)
|
|
|
|
# Body part comparison
|
|
bp1 = get_body_parts(pd1["Bodily Location"]).value_counts().head(6)
|
|
bp2 = get_body_parts(pd2["Bodily Location"]).value_counts().head(6)
|
|
all_bp = list(dict.fromkeys(list(bp1.index) + list(bp2.index)))[:8]
|
|
bp_rows = [[bp, str(bp1.get(bp, 0)), str(bp2.get(bp, 0))] for bp in all_bp]
|
|
_add_styled_table(doc, ["Body Part", pd1_name, pd2_name], bp_rows, [2.5, 1.8, 1.8])
|
|
|
|
doc.add_page_break()
|
|
|
|
# ═══════════════════════════════════════════
|
|
# 5. CONSEQUENCE ANALYSIS
|
|
# ═══════════════════════════════════════════
|
|
h1("5. Consequence Analysis")
|
|
add_chart("consequence_by_pd", 5.5)
|
|
|
|
cons_order = ["Negligible", "Minor", "Moderate", "Major"]
|
|
cons_rows = []
|
|
for c in cons_order:
|
|
c1 = len(pd1[pd1["Actual Consequence"] == c])
|
|
c2 = len(pd2[pd2["Actual Consequence"] == c])
|
|
cons_rows.append([c, str(c1), f"{c1/max(len(pd1),1)*100:.1f}%",
|
|
str(c2), f"{c2/max(len(pd2),1)*100:.1f}%"])
|
|
_add_styled_table(doc, ["Consequence", pd1_name, "%", pd2_name, "%"], cons_rows,
|
|
[1.5, 1.0, 0.8, 1.0, 0.8])
|
|
|
|
doc.add_page_break()
|
|
|
|
# ═══════════════════════════════════════════
|
|
# 6. CRP & ROOT CAUSE
|
|
# ═══════════════════════════════════════════
|
|
h1("6. Critical Risk Protocols & Root Causes")
|
|
h2("6.1 CRP Comparison")
|
|
add_chart("crp_by_pd", 5.5)
|
|
|
|
h2("6.2 Root Cause Comparison")
|
|
add_chart("rootcause_by_pd", 5.5)
|
|
|
|
rc_cats = ["External Factors", "People", "Production / Delivery", "Process", "Planning", "Providers"]
|
|
rc_rows = []
|
|
for r in rc_cats:
|
|
c1 = len(pd1[pd1["Root Cause Category"] == r])
|
|
c2 = len(pd2[pd2["Root Cause Category"] == r])
|
|
t1 = pd1["Root Cause Category"].notna().sum()
|
|
t2 = pd2["Root Cause Category"].notna().sum()
|
|
rc_rows.append([r, str(c1), f"{c1/max(t1,1)*100:.1f}%",
|
|
str(c2), f"{c2/max(t2,1)*100:.1f}%"])
|
|
_add_styled_table(doc, ["Root Cause", pd1_name, "%", pd2_name, "%"], rc_rows,
|
|
[2.0, 1.1, 0.8, 1.0, 0.8])
|
|
|
|
doc.add_page_break()
|
|
|
|
# ═══════════════════════════════════════════
|
|
# 7. TIMING PATTERNS
|
|
# ═══════════════════════════════════════════
|
|
h1("7. Timing Patterns")
|
|
add_chart("dow_by_pd", 5.5)
|
|
|
|
doc.add_page_break()
|
|
|
|
# ═══════════════════════════════════════════
|
|
# 8. INVESTIGATION PERFORMANCE
|
|
# ═══════════════════════════════════════════
|
|
h1("8. Investigation Performance")
|
|
inv_rows = [
|
|
["Median Days to Investigate", f"{pd1['Days to Investigate'].dropna().median():.0f}",
|
|
f"{pd2['Days to Investigate'].dropna().median():.0f}"],
|
|
["Mean Days to Investigate", f"{pd1['Days to Investigate'].dropna().mean():.1f}",
|
|
f"{pd2['Days to Investigate'].dropna().mean():.1f}"],
|
|
["Median Days to Close", f"{pd1['Days to Close'].dropna().median():.0f}",
|
|
f"{pd2['Days to Close'].dropna().median():.0f}"],
|
|
["Mean Days to Close", f"{pd1['Days to Close'].dropna().mean():.1f}",
|
|
f"{pd2['Days to Close'].dropna().mean():.1f}"],
|
|
["Events Closed", f"{(pd1['Status']=='Closed').sum()} ({(pd1['Status']=='Closed').sum()/max(len(pd1),1)*100:.0f}%)",
|
|
f"{(pd2['Status']=='Closed').sum()} ({(pd2['Status']=='Closed').sum()/max(len(pd2),1)*100:.0f}%)"],
|
|
["Events Open", str((pd1["Status"] == "Open").sum()), str((pd2["Status"] == "Open").sum())],
|
|
]
|
|
_add_styled_table(doc, ["Metric", pd1_name, pd2_name], inv_rows, [2.5, 1.8, 1.8])
|
|
|
|
doc.add_page_break()
|
|
|
|
# ═══════════════════════════════════════════
|
|
# 9. RECOMMENDATIONS
|
|
# ═══════════════════════════════════════════
|
|
h1("9. Key Findings & Recommendations")
|
|
|
|
h2(f"9.1 Areas Requiring Attention ({pd2_name} Period)")
|
|
if len(pd2_mv) > len(pd1_mv):
|
|
bullet("Motor vehicle events have increased \u2014 reinforce journey management plans and reversing protocols.")
|
|
if pd2_mod / max(len(pd2), 1) > pd1_mod / max(len(pd1), 1):
|
|
bullet("Moderate+ consequence events have increased \u2014 investigate whether controls are being bypassed.")
|
|
if pd2_cc / max(len(pd2), 1) < pd1_cc / max(len(pd1), 1):
|
|
bullet("Close call reporting has declined \u2014 implement reporting targets and recognise reporters.")
|
|
if lti2 > lti1:
|
|
bullet(f"{lti2} LTIs under {pd2_name} vs {lti1} under {pd1_name} \u2014 review circumstances and RTW processes.")
|
|
|
|
h2("9.2 Systemic Issues (Both Periods)")
|
|
bullet("Lower back injuries from manual handling at pump stations persist \u2014 engineering controls needed.")
|
|
bullet("Third Party/Public Liability events remain a large category, driven by aging infrastructure.")
|
|
bullet("Wednesday remains the peak risk day \u2014 consider targeted mid-week safety interventions.")
|
|
|
|
h2("9.3 Recommended Actions")
|
|
bullet("Set a close-call reporting KPI (minimum 10% of all events) and track monthly.")
|
|
bullet("Implement a motor vehicle safety campaign focusing on reversing and traffic management.")
|
|
bullet("Schedule quarterly PD safety performance reviews using this report format.")
|
|
|
|
# ── Save ──
|
|
output_path = os.path.join(output_dir, "SHEQ_PD_Comparison.docx")
|
|
doc.save(output_path)
|
|
return output_path
|
|
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# MAIN ENTRY POINT
|
|
# ═══════════════════════════════════════════════
|
|
|
|
def run_analysis(filepath, start_date, split_date, pd1_name, pd2_name, output_dir="output"):
|
|
"""Run the full analysis pipeline."""
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
print(f"Loading data from {filepath}...")
|
|
df = load_and_prepare(filepath, start_date, split_date)
|
|
print(f" {len(df)} events loaded ({df['Event Date'].min().date()} to {df['Event Date'].max().date()})")
|
|
print(f" {pd1_name}: {(df['PD']=='pd1').sum()} events")
|
|
print(f" {pd2_name}: {(df['PD']=='pd2').sum()} events")
|
|
|
|
print("Generating charts...")
|
|
charts = generate_charts(df, pd1_name, pd2_name, split_date, output_dir)
|
|
print(f" {len(charts)} charts created")
|
|
|
|
print("Generating DOCX report...")
|
|
docx_path = generate_docx(df, pd1_name, pd2_name, split_date, charts, output_dir)
|
|
print(f" Report saved to {docx_path}")
|
|
|
|
return docx_path
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_analysis(
|
|
filepath="All_Events__5_.xlsx",
|
|
start_date="2024-01-01",
|
|
split_date="2025-04-01",
|
|
pd1_name="Matthew Arthur",
|
|
pd2_name="Manga",
|
|
output_dir="output"
|
|
)
|