push
This commit is contained in:
+694
@@ -0,0 +1,694 @@
|
||||
"""
|
||||
SHEQ Incident Analysis Engine
|
||||
Generates charts and a DOCX report comparing two Project Director periods.
|
||||
|
||||
Usage:
|
||||
from analysis import run_analysis
|
||||
run_analysis("All_Events__5_.xlsx", "2024-01-01", "2025-04-01",
|
||||
"Matthew Arthur", "Manga", output_dir="output")
|
||||
"""
|
||||
|
||||
import os
|
||||
import pandas as pd
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from docx import Document
|
||||
from docx.shared import Inches, Pt, Cm, RGBColor, Emu
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
from docx.enum.table import WD_TABLE_ALIGNMENT
|
||||
from docx.oxml.ns import qn, nsdecls
|
||||
from docx.oxml import parse_xml
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
# ── Brand Colours (see DESIGN.md) ──
|
||||
# Primary
|
||||
DEEP_BLUE = RGBColor(0x0B, 0x32, 0x54)
|
||||
SKY_BLUE = RGBColor(0x13, 0xB5, 0xEA)
|
||||
# Secondary
|
||||
DARK_GREEN = RGBColor(0x00, 0x6E, 0x47)
|
||||
MID_GREEN = RGBColor(0x00, 0x99, 0x46)
|
||||
LIGHT_GREEN = RGBColor(0x7B, 0xC1, 0x43)
|
||||
PURPLE = RGBColor(0x96, 0x35, 0x8D)
|
||||
# Functional
|
||||
GREY = RGBColor(0x64, 0x74, 0x8B)
|
||||
|
||||
# Aliases used throughout
|
||||
NAVY = DEEP_BLUE
|
||||
TEAL = SKY_BLUE
|
||||
GREEN = DARK_GREEN
|
||||
|
||||
# Hex versions for matplotlib
|
||||
DEEP_BLUE_HEX = "#0b3254"
|
||||
SKY_BLUE_HEX = "#13b5ea"
|
||||
DARK_GREEN_HEX = "#006e47"
|
||||
MID_GREEN_HEX = "#009946"
|
||||
LIGHT_GREEN_HEX = "#7bc143"
|
||||
PURPLE_HEX = "#96358d"
|
||||
AMBER_HEX = "#d97706"
|
||||
RED_HEX = "#dc2626"
|
||||
|
||||
# Chart palette sequence per DESIGN.md
|
||||
CHART_PALETTE = [DEEP_BLUE_HEX, SKY_BLUE_HEX, DARK_GREEN_HEX, MID_GREEN_HEX,
|
||||
LIGHT_GREEN_HEX, PURPLE_HEX, AMBER_HEX, RED_HEX]
|
||||
|
||||
# PD comparison colours
|
||||
MA_HEX = DEEP_BLUE_HEX # PD1 = Deep Blue
|
||||
MG_HEX = SKY_BLUE_HEX # PD2 = Sky Blue
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════
|
||||
# DATA LOADING & PREPARATION
|
||||
# ═══════════════════════════════════════════════
|
||||
|
||||
def load_and_prepare(filepath, start_date, split_date):
|
||||
"""Load Excel, filter by date range, add PD column."""
|
||||
df = pd.read_excel(filepath)
|
||||
df["Event Date"] = pd.to_datetime(df["Event Date"])
|
||||
df = df[df["Event Date"] >= pd.Timestamp(start_date)].copy()
|
||||
df["Year"] = df["Event Date"].dt.year
|
||||
df["Month"] = df["Event Date"].dt.month
|
||||
df["MonthName"] = df["Event Date"].dt.strftime("%b")
|
||||
df["DOW"] = df["Event Date"].dt.day_name()
|
||||
df["YearMonth"] = df["Event Date"].dt.to_period("M")
|
||||
df["PD"] = df["Event Date"].apply(
|
||||
lambda x: "pd1" if x < pd.Timestamp(split_date) else "pd2"
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
def get_body_parts(series):
|
||||
"""Split multi-value body part entries and normalise."""
|
||||
parts = []
|
||||
for val in series.dropna():
|
||||
for part in str(val).split(","):
|
||||
part = part.strip()
|
||||
if part and "unspecified" not in part.lower():
|
||||
parts.append(part)
|
||||
return pd.Series(parts)
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════
|
||||
# CHART GENERATION
|
||||
# ═══════════════════════════════════════════════
|
||||
|
||||
def _save(fig, path):
|
||||
fig.tight_layout()
|
||||
fig.savefig(path, dpi=200, bbox_inches="tight", facecolor="white")
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def _setup_chart_style():
|
||||
"""Configure matplotlib to use Source Sans Pro if available."""
|
||||
import matplotlib.font_manager as fm
|
||||
available = [f.name for f in fm.fontManager.ttflist]
|
||||
if "Source Sans Pro" in available:
|
||||
plt.rcParams["font.family"] = "Source Sans Pro"
|
||||
elif "Source Sans 3" in available:
|
||||
plt.rcParams["font.family"] = "Source Sans 3"
|
||||
else:
|
||||
plt.rcParams["font.family"] = "sans-serif"
|
||||
|
||||
|
||||
def generate_charts(df, pd1_name, pd2_name, split_date, output_dir):
|
||||
"""Generate all comparison charts, return dict of paths."""
|
||||
_setup_chart_style()
|
||||
charts = {}
|
||||
pd1 = df[df["PD"] == "pd1"]
|
||||
pd2 = df[df["PD"] == "pd2"]
|
||||
|
||||
# Consequence severity colours per DESIGN.md
|
||||
CONS_COLORS = [DARK_GREEN_HEX, AMBER_HEX, RED_HEX, PURPLE_HEX]
|
||||
|
||||
# 1. Monthly trend by PD
|
||||
fig, ax = plt.subplots(figsize=(10, 4))
|
||||
start_period = df["Event Date"].min().to_period("M")
|
||||
end_period = df["Event Date"].max().to_period("M")
|
||||
months_all = pd.period_range(start_period, end_period, freq="M")
|
||||
monthly = df.groupby(["YearMonth", "PD"]).size().unstack(fill_value=0).reindex(months_all, fill_value=0)
|
||||
x = range(len(months_all))
|
||||
labels = [m.strftime("%b %y") for m in months_all]
|
||||
ma_vals = monthly.get("pd1", pd.Series(0, index=months_all)).values
|
||||
mg_vals = monthly.get("pd2", pd.Series(0, index=months_all)).values
|
||||
ax.bar(x, ma_vals, color=MA_HEX, label=pd1_name, width=0.7, alpha=0.9)
|
||||
ax.bar(x, mg_vals, bottom=ma_vals, color=MG_HEX, label=pd2_name, width=0.7, alpha=0.9)
|
||||
split_m = pd.Timestamp(split_date).to_period("M")
|
||||
if split_m in months_all:
|
||||
trans_idx = list(months_all).index(split_m)
|
||||
ax.axvline(x=trans_idx - 0.5, color=RED_HEX, linestyle="--", linewidth=1.5, alpha=0.7)
|
||||
ax.text(trans_idx - 0.3, max(max(ma_vals + mg_vals), 1) * 0.95, "PD Transition",
|
||||
fontsize=9, color=RED_HEX, ha="left")
|
||||
ax.set_xticks(x)
|
||||
ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
|
||||
ax.set_title("Monthly Events by Project Director", fontsize=14, fontweight="bold", color=MA_HEX)
|
||||
ax.set_ylabel("Events")
|
||||
ax.legend(loc="upper right")
|
||||
ax.spines["top"].set_visible(False)
|
||||
ax.spines["right"].set_visible(False)
|
||||
p = os.path.join(output_dir, "monthly_by_pd.png")
|
||||
_save(fig, p)
|
||||
charts["monthly_by_pd"] = p
|
||||
|
||||
# 2. Event type comparison
|
||||
evt_types = df["Event Type"].value_counts().index[:8]
|
||||
ma_evt = pd1["Event Type"].value_counts().reindex(evt_types, fill_value=0)
|
||||
mg_evt = pd2["Event Type"].value_counts().reindex(evt_types, fill_value=0)
|
||||
fig, ax = plt.subplots(figsize=(9, 5))
|
||||
y = np.arange(len(evt_types))
|
||||
h = 0.35
|
||||
ax.barh(y - h / 2, ma_evt.values, h, label=pd1_name, color=MA_HEX)
|
||||
ax.barh(y + h / 2, mg_evt.values, h, label=pd2_name, color=MG_HEX)
|
||||
ax.set_yticks(y)
|
||||
ax.set_yticklabels(evt_types, fontsize=10)
|
||||
ax.invert_yaxis()
|
||||
ax.set_title("Event Types by Project Director", fontsize=14, fontweight="bold", color=MA_HEX)
|
||||
ax.legend()
|
||||
ax.spines["top"].set_visible(False)
|
||||
ax.spines["right"].set_visible(False)
|
||||
for i, (v1, v2) in enumerate(zip(ma_evt.values, mg_evt.values)):
|
||||
ax.text(v1 + 0.2, i - h / 2, str(v1), va="center", fontsize=9, color=MA_HEX)
|
||||
ax.text(v2 + 0.2, i + h / 2, str(v2), va="center", fontsize=9, color=MG_HEX)
|
||||
p = os.path.join(output_dir, "event_type_by_pd.png")
|
||||
_save(fig, p)
|
||||
charts["event_type_by_pd"] = p
|
||||
|
||||
# 3. Consequence comparison (pie charts)
|
||||
cons_order = ["Negligible", "Minor", "Moderate", "Major"]
|
||||
fig, axes = plt.subplots(1, 2, figsize=(9, 3.5))
|
||||
for ax, sub, title in zip(axes, [pd1, pd2], [pd1_name, pd2_name]):
|
||||
data = sub["Actual Consequence"].value_counts().reindex(cons_order, fill_value=0)
|
||||
ax.pie(data.values, labels=cons_order, autopct="%1.0f%%", colors=CONS_COLORS, startangle=140,
|
||||
textprops={"fontsize": 9})
|
||||
ax.set_title(title, fontsize=13, fontweight="bold", color=MA_HEX)
|
||||
p = os.path.join(output_dir, "consequence_by_pd.png")
|
||||
_save(fig, p)
|
||||
charts["consequence_by_pd"] = p
|
||||
|
||||
# 4. Day of week
|
||||
dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
||||
fig, ax = plt.subplots(figsize=(9, 4))
|
||||
x_arr = np.arange(len(dow_order))
|
||||
w = 0.35
|
||||
ma_d = pd1["DOW"].value_counts().reindex(dow_order, fill_value=0)
|
||||
mg_d = pd2["DOW"].value_counts().reindex(dow_order, fill_value=0)
|
||||
b1 = ax.bar(x_arr - w / 2, ma_d.values, w, label=pd1_name, color=MA_HEX)
|
||||
b2 = ax.bar(x_arr + w / 2, mg_d.values, w, label=pd2_name, color=MG_HEX)
|
||||
ax.set_xticks(x_arr)
|
||||
ax.set_xticklabels([d[:3] for d in dow_order])
|
||||
ax.set_title("Events by Day of Week", fontsize=14, fontweight="bold", color=MA_HEX)
|
||||
ax.legend()
|
||||
ax.spines["top"].set_visible(False)
|
||||
ax.spines["right"].set_visible(False)
|
||||
for b in b1:
|
||||
if b.get_height() > 0:
|
||||
ax.text(b.get_x() + b.get_width() / 2, b.get_height() + 0.3, str(int(b.get_height())),
|
||||
ha="center", fontsize=9)
|
||||
for b in b2:
|
||||
if b.get_height() > 0:
|
||||
ax.text(b.get_x() + b.get_width() / 2, b.get_height() + 0.3, str(int(b.get_height())),
|
||||
ha="center", fontsize=9)
|
||||
p = os.path.join(output_dir, "dow_by_pd.png")
|
||||
_save(fig, p)
|
||||
charts["dow_by_pd"] = p
|
||||
|
||||
# 5. Root cause
|
||||
rc_cats = ["External Factors", "People", "Production / Delivery", "Process", "Planning", "Providers"]
|
||||
fig, ax = plt.subplots(figsize=(9, 4))
|
||||
y = np.arange(len(rc_cats))
|
||||
h = 0.35
|
||||
ma_rc = pd1["Root Cause Category"].value_counts().reindex(rc_cats, fill_value=0)
|
||||
mg_rc = pd2["Root Cause Category"].value_counts().reindex(rc_cats, fill_value=0)
|
||||
ax.barh(y - h / 2, ma_rc.values, h, label=pd1_name, color=MA_HEX)
|
||||
ax.barh(y + h / 2, mg_rc.values, h, label=pd2_name, color=MG_HEX)
|
||||
ax.set_yticks(y)
|
||||
ax.set_yticklabels(rc_cats, fontsize=10)
|
||||
ax.invert_yaxis()
|
||||
ax.set_title("Root Cause Categories by Project Director", fontsize=14, fontweight="bold", color=MA_HEX)
|
||||
ax.legend()
|
||||
ax.spines["top"].set_visible(False)
|
||||
ax.spines["right"].set_visible(False)
|
||||
p = os.path.join(output_dir, "rootcause_by_pd.png")
|
||||
_save(fig, p)
|
||||
charts["rootcause_by_pd"] = p
|
||||
|
||||
# 6. CRP comparison
|
||||
crp_all = df["CRPInvolved"].value_counts()
|
||||
crp_active = crp_all[~crp_all.index.isin(["None Identified", "Under Investigation"])].head(8)
|
||||
crp_cats = crp_active.index
|
||||
fig, ax = plt.subplots(figsize=(9, 4.5))
|
||||
y = np.arange(len(crp_cats))
|
||||
ma_c = pd1["CRPInvolved"].value_counts().reindex(crp_cats, fill_value=0)
|
||||
mg_c = pd2["CRPInvolved"].value_counts().reindex(crp_cats, fill_value=0)
|
||||
ax.barh(y - h / 2, ma_c.values, h, label=pd1_name, color=MA_HEX)
|
||||
ax.barh(y + h / 2, mg_c.values, h, label=pd2_name, color=MG_HEX)
|
||||
ax.set_yticks(y)
|
||||
ax.set_yticklabels(crp_cats, fontsize=9)
|
||||
ax.invert_yaxis()
|
||||
ax.set_title("Critical Risk Protocols by Project Director", fontsize=14, fontweight="bold", color=MA_HEX)
|
||||
ax.legend()
|
||||
ax.spines["top"].set_visible(False)
|
||||
ax.spines["right"].set_visible(False)
|
||||
p = os.path.join(output_dir, "crp_by_pd.png")
|
||||
_save(fig, p)
|
||||
charts["crp_by_pd"] = p
|
||||
|
||||
# 7. Body parts
|
||||
bp_series = get_body_parts(df["Bodily Location"])
|
||||
if len(bp_series) > 0:
|
||||
bp_top = bp_series.value_counts().head(10)
|
||||
fig, ax = plt.subplots(figsize=(8, 4))
|
||||
ax.barh(range(len(bp_top)), bp_top.values, color=DARK_GREEN_HEX)
|
||||
ax.set_yticks(range(len(bp_top)))
|
||||
ax.set_yticklabels(bp_top.index, fontsize=10)
|
||||
ax.invert_yaxis()
|
||||
for i, v in enumerate(bp_top.values):
|
||||
ax.text(v + 0.1, i, str(v), va="center", fontsize=11, fontweight="bold")
|
||||
ax.set_title("Top Injured Body Parts", fontsize=14, fontweight="bold", color=MA_HEX)
|
||||
ax.spines["top"].set_visible(False)
|
||||
ax.spines["right"].set_visible(False)
|
||||
p = os.path.join(output_dir, "body_parts.png")
|
||||
_save(fig, p)
|
||||
charts["body_parts"] = p
|
||||
|
||||
return charts
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════
|
||||
# DOCX GENERATION
|
||||
# ═══════════════════════════════════════════════
|
||||
|
||||
def _set_cell_shading(cell, color_hex):
|
||||
"""Apply background shading to a table cell."""
|
||||
shading = parse_xml(f'<w:shd {nsdecls("w")} w:fill="{color_hex}" w:val="clear"/>')
|
||||
cell._tc.get_or_add_tcPr().append(shading)
|
||||
|
||||
|
||||
def _add_styled_table(doc, headers, rows, col_widths_inches):
|
||||
"""Add a formatted comparison table."""
|
||||
table = doc.add_table(rows=1 + len(rows), cols=len(headers))
|
||||
table.alignment = WD_TABLE_ALIGNMENT.LEFT
|
||||
table.style = "Table Grid"
|
||||
|
||||
# Header row
|
||||
for i, h in enumerate(headers):
|
||||
cell = table.rows[0].cells[i]
|
||||
cell.text = ""
|
||||
p = cell.paragraphs[0]
|
||||
run = p.add_run(h)
|
||||
run.bold = True
|
||||
run.font.size = Pt(9)
|
||||
run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
|
||||
run.font.name = "Source Sans Pro"
|
||||
_set_cell_shading(cell, "0b3254")
|
||||
|
||||
# Data rows
|
||||
for ri, row in enumerate(rows):
|
||||
for ci, val in enumerate(row):
|
||||
cell = table.rows[ri + 1].cells[ci]
|
||||
cell.text = ""
|
||||
p = cell.paragraphs[0]
|
||||
run = p.add_run(str(val))
|
||||
run.font.size = Pt(9)
|
||||
run.font.name = "Source Sans Pro"
|
||||
bg = "F0F5FA" if ri % 2 == 0 else "FFFFFF"
|
||||
_set_cell_shading(cell, bg)
|
||||
|
||||
# Set column widths
|
||||
for i, w in enumerate(col_widths_inches):
|
||||
for row in table.rows:
|
||||
row.cells[i].width = Inches(w)
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def generate_docx(df, pd1_name, pd2_name, split_date, charts, output_dir):
|
||||
"""Generate the full DOCX report."""
|
||||
doc = Document()
|
||||
|
||||
# Set default font
|
||||
style = doc.styles["Normal"]
|
||||
style.font.name = "Source Sans Pro"
|
||||
style.font.size = Pt(11)
|
||||
|
||||
# Heading styles
|
||||
for level, size, color in [(1, 16, NAVY), (2, 13, TEAL)]:
|
||||
hs = doc.styles[f"Heading {level}"]
|
||||
hs.font.name = "Source Sans Pro"
|
||||
hs.font.size = Pt(size)
|
||||
hs.font.color.rgb = color
|
||||
hs.font.bold = True
|
||||
|
||||
pd1 = df[df["PD"] == "pd1"]
|
||||
pd2 = df[df["PD"] == "pd2"]
|
||||
total = len(df)
|
||||
pd1_months = max(1, (pd.Timestamp(split_date) - df["Event Date"].min()).days / 30.44)
|
||||
pd2_months = max(1, (df["Event Date"].max() - pd.Timestamp(split_date)).days / 30.44 + 1)
|
||||
|
||||
pd1_start = pd1["Event Date"].min().strftime("%b %Y") if len(pd1) > 0 else "N/A"
|
||||
pd1_end = pd1["Event Date"].max().strftime("%b %Y") if len(pd1) > 0 else "N/A"
|
||||
pd2_start = pd2["Event Date"].min().strftime("%b %Y") if len(pd2) > 0 else "N/A"
|
||||
pd2_end = pd2["Event Date"].max().strftime("%b %Y") if len(pd2) > 0 else "N/A"
|
||||
|
||||
# ── Title page ──
|
||||
doc.add_paragraph("")
|
||||
doc.add_paragraph("")
|
||||
p = doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
run = p.add_run("SHEQ Incident Analysis")
|
||||
run.font.size = Pt(28)
|
||||
run.bold = True
|
||||
run.font.name = "Source Sans Pro"
|
||||
run.font.color.rgb = NAVY
|
||||
|
||||
p = doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
run = p.add_run("Far North Waters Project")
|
||||
run.font.size = Pt(16)
|
||||
run.font.name = "Source Sans Pro"
|
||||
run.font.color.rgb = TEAL
|
||||
|
||||
p = doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
run = p.add_run(f"{pd1_start} \u2013 {pd2_end} (MTD)")
|
||||
run.font.size = Pt(14)
|
||||
run.font.name = "Source Sans Pro"
|
||||
run.font.color.rgb = TEAL
|
||||
|
||||
doc.add_paragraph("")
|
||||
p = doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
run = p.add_run("Performance by Project Director")
|
||||
run.font.size = Pt(13)
|
||||
run.bold = True
|
||||
run.font.name = "Source Sans Pro"
|
||||
run.font.color.rgb = NAVY
|
||||
|
||||
p = doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
run = p.add_run(f"{pd1_name} ")
|
||||
run.bold = True
|
||||
run.font.color.rgb = NAVY
|
||||
run = p.add_run(f"({pd1_start} \u2013 {pd1_end}) | ")
|
||||
run.font.color.rgb = GREY
|
||||
run = p.add_run(f"{pd2_name} ")
|
||||
run.bold = True
|
||||
run.font.color.rgb = TEAL
|
||||
run = p.add_run(f"({pd2_start} \u2013 {pd2_end})")
|
||||
run.font.color.rgb = GREY
|
||||
|
||||
doc.add_paragraph("")
|
||||
p = doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
run = p.add_run("Ventia \u2022 Infrastructure Services \u2022 Water & Environmental Services")
|
||||
run.font.size = Pt(10)
|
||||
run.font.color.rgb = GREY
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
# ── Helper functions ──
|
||||
def h1(text):
|
||||
doc.add_heading(text, level=1)
|
||||
|
||||
def h2(text):
|
||||
doc.add_heading(text, level=2)
|
||||
|
||||
def text(t, bold=False):
|
||||
p = doc.add_paragraph()
|
||||
run = p.add_run(t)
|
||||
run.bold = bold
|
||||
return p
|
||||
|
||||
def bullet(t):
|
||||
p = doc.add_paragraph(t, style="List Bullet")
|
||||
return p
|
||||
|
||||
def add_chart(name, width=5.5):
|
||||
if name in charts:
|
||||
doc.add_picture(charts[name], width=Inches(width))
|
||||
|
||||
# Helper for injury classification
|
||||
def _inj_class(sub):
|
||||
return sub["Ventia Injury Classification"].value_counts()
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# 1. EXECUTIVE SUMMARY
|
||||
# ═══════════════════════════════════════════
|
||||
h1("1. Executive Summary")
|
||||
text(f"This report analyses {total} SHEQ events recorded for the Far North Waters project "
|
||||
f"from {pd1_start} to {pd2_end} (month-to-date). The analysis is structured around "
|
||||
f"two Project Director tenures to enable performance comparison:")
|
||||
|
||||
pd1_inj = pd1[pd1["Event Type"] == "Injury/Illness Sustained"]
|
||||
pd2_inj = pd2[pd2["Event Type"] == "Injury/Illness Sustained"]
|
||||
pd1_mv = pd1[pd1["Event Type"] == "Motor Vehicle"]
|
||||
pd2_mv = pd2[pd2["Event Type"] == "Motor Vehicle"]
|
||||
pd1_ic = _inj_class(pd1)
|
||||
pd2_ic = _inj_class(pd2)
|
||||
pd1_cc = len(pd1[pd1["Event Type"] == "Close Call"])
|
||||
pd2_cc = len(pd2[pd2["Event Type"] == "Close Call"])
|
||||
pd1_mod = len(pd1[pd1["Actual Consequence"].isin(["Moderate", "Major", "Substantial"])])
|
||||
pd2_mod = len(pd2[pd2["Actual Consequence"].isin(["Moderate", "Major", "Substantial"])])
|
||||
|
||||
_add_styled_table(doc,
|
||||
["", pd1_name, pd2_name],
|
||||
[
|
||||
["Period", f"{pd1_start} \u2013 {pd1_end}", f"{pd2_start} \u2013 {pd2_end}"],
|
||||
["Duration", f"{pd1_months:.0f} months", f"{pd2_months:.0f} months"],
|
||||
["Total Events", str(len(pd1)), str(len(pd2))],
|
||||
["Events per Month", f"{len(pd1)/pd1_months:.1f}", f"{len(pd2)/pd2_months:.1f}"],
|
||||
["Injuries", f"{len(pd1_inj)} ({len(pd1_inj)/max(len(pd1),1)*100:.1f}%)",
|
||||
f"{len(pd2_inj)} ({len(pd2_inj)/max(len(pd2),1)*100:.1f}%)"],
|
||||
["Motor Vehicle Events", f"{len(pd1_mv)} ({len(pd1_mv)/max(len(pd1),1)*100:.1f}%)",
|
||||
f"{len(pd2_mv)} ({len(pd2_mv)/max(len(pd2),1)*100:.1f}%)"],
|
||||
["Lost Time Injuries", str(pd1_ic.get("Lost Time Injury", 0)), str(pd2_ic.get("Lost Time Injury", 0))],
|
||||
["First Aid Treatments", str(pd1_ic.get("First Aid Treatment", 0)), str(pd2_ic.get("First Aid Treatment", 0))],
|
||||
["Close Calls", f"{pd1_cc} ({pd1_cc/max(len(pd1),1)*100:.1f}%)",
|
||||
f"{pd2_cc} ({pd2_cc/max(len(pd2),1)*100:.1f}%)"],
|
||||
["Moderate+ Consequence", f"{pd1_mod} ({pd1_mod/max(len(pd1),1)*100:.1f}%)",
|
||||
f"{pd2_mod} ({pd2_mod/max(len(pd2),1)*100:.1f}%)"],
|
||||
["Median Days to Investigate", f"{pd1['Days to Investigate'].dropna().median():.0f}",
|
||||
f"{pd2['Days to Investigate'].dropna().median():.0f}"],
|
||||
["Median Days to Close", f"{pd1['Days to Close'].dropna().median():.0f}",
|
||||
f"{pd2['Days to Close'].dropna().median():.0f}"],
|
||||
],
|
||||
[2.0, 2.2, 2.3]
|
||||
)
|
||||
|
||||
doc.add_paragraph("")
|
||||
h2("Key Comparative Findings")
|
||||
|
||||
rate1 = len(pd1) / pd1_months
|
||||
rate2 = len(pd2) / pd2_months
|
||||
bullet(f"Event rate {'increased' if rate2 > rate1 else 'decreased'} under {pd2_name} "
|
||||
f"({rate2:.1f}/month vs {rate1:.1f}/month), with Moderate+ consequences at "
|
||||
f"{pd2_mod/max(len(pd2),1)*100:.1f}% vs {pd1_mod/max(len(pd1),1)*100:.1f}%.")
|
||||
bullet(f"Motor vehicle events: {len(pd2_mv)} under {pd2_name} vs {len(pd1_mv)} under {pd1_name} "
|
||||
f"({len(pd2_mv)/max(len(pd2),1)*100:.1f}% vs {len(pd1_mv)/max(len(pd1),1)*100:.1f}%).")
|
||||
bullet(f"Close call reporting: {pd2_cc/max(len(pd2),1)*100:.1f}% under {pd2_name} vs "
|
||||
f"{pd1_cc/max(len(pd1),1)*100:.1f}% under {pd1_name}.")
|
||||
|
||||
lti1 = pd1_ic.get("Lost Time Injury", 0)
|
||||
lti2 = pd2_ic.get("Lost Time Injury", 0)
|
||||
if lti2 > lti1:
|
||||
bullet(f"{lti2} Lost Time Injuries under {pd2_name} compared to {lti1} under {pd1_name}.")
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# 2. MONTHLY TRENDS
|
||||
# ═══════════════════════════════════════════
|
||||
h1("2. Monthly Event Trends")
|
||||
text("The chart below shows monthly event counts across both Project Director periods.")
|
||||
add_chart("monthly_by_pd", 5.8)
|
||||
doc.add_page_break()
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# 3. EVENT TYPE COMPARISON
|
||||
# ═══════════════════════════════════════════
|
||||
h1("3. Event Type Comparison")
|
||||
add_chart("event_type_by_pd", 5.5)
|
||||
|
||||
evt_types = df["Event Type"].value_counts().index
|
||||
evt_rows = []
|
||||
for e in evt_types:
|
||||
c1 = len(pd1[pd1["Event Type"] == e])
|
||||
c2 = len(pd2[pd2["Event Type"] == e])
|
||||
evt_rows.append([e, str(c1), f"{c1/max(len(pd1),1)*100:.1f}%",
|
||||
str(c2), f"{c2/max(len(pd2),1)*100:.1f}%"])
|
||||
_add_styled_table(doc, ["Event Type", pd1_name, "%", pd2_name, "%"], evt_rows,
|
||||
[2.0, 1.1, 0.8, 1.0, 0.8])
|
||||
|
||||
doc.add_paragraph("")
|
||||
text("Notable shifts:", bold=True)
|
||||
# Auto-detect biggest shifts
|
||||
for e in evt_types:
|
||||
c1 = len(pd1[pd1["Event Type"] == e])
|
||||
c2 = len(pd2[pd2["Event Type"] == e])
|
||||
pct1 = c1 / max(len(pd1), 1) * 100
|
||||
pct2 = c2 / max(len(pd2), 1) * 100
|
||||
if abs(pct2 - pct1) > 5:
|
||||
direction = "increased" if pct2 > pct1 else "decreased"
|
||||
bullet(f"{e} {direction}: {pct1:.1f}% \u2192 {pct2:.1f}% ({c1} \u2192 {c2} events).")
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# 4. INJURY ANALYSIS
|
||||
# ═══════════════════════════════════════════
|
||||
h1("4. Injury Analysis")
|
||||
h2("4.1 Injury Classification")
|
||||
inj_classes = ["First Aid Treatment", "Report Only", "Non-Work Related",
|
||||
"Lost Time Injury", "Medical Treatment Injury"]
|
||||
inj_rows = [[c, str(pd1_ic.get(c, 0)), str(pd2_ic.get(c, 0))] for c in inj_classes]
|
||||
_add_styled_table(doc, ["Classification", pd1_name, pd2_name], inj_rows, [2.5, 1.8, 1.8])
|
||||
|
||||
h2("4.2 Body Parts Injured")
|
||||
add_chart("body_parts", 5.0)
|
||||
|
||||
# Body part comparison
|
||||
bp1 = get_body_parts(pd1["Bodily Location"]).value_counts().head(6)
|
||||
bp2 = get_body_parts(pd2["Bodily Location"]).value_counts().head(6)
|
||||
all_bp = list(dict.fromkeys(list(bp1.index) + list(bp2.index)))[:8]
|
||||
bp_rows = [[bp, str(bp1.get(bp, 0)), str(bp2.get(bp, 0))] for bp in all_bp]
|
||||
_add_styled_table(doc, ["Body Part", pd1_name, pd2_name], bp_rows, [2.5, 1.8, 1.8])
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# 5. CONSEQUENCE ANALYSIS
|
||||
# ═══════════════════════════════════════════
|
||||
h1("5. Consequence Analysis")
|
||||
add_chart("consequence_by_pd", 5.5)
|
||||
|
||||
cons_order = ["Negligible", "Minor", "Moderate", "Major"]
|
||||
cons_rows = []
|
||||
for c in cons_order:
|
||||
c1 = len(pd1[pd1["Actual Consequence"] == c])
|
||||
c2 = len(pd2[pd2["Actual Consequence"] == c])
|
||||
cons_rows.append([c, str(c1), f"{c1/max(len(pd1),1)*100:.1f}%",
|
||||
str(c2), f"{c2/max(len(pd2),1)*100:.1f}%"])
|
||||
_add_styled_table(doc, ["Consequence", pd1_name, "%", pd2_name, "%"], cons_rows,
|
||||
[1.5, 1.0, 0.8, 1.0, 0.8])
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# 6. CRP & ROOT CAUSE
|
||||
# ═══════════════════════════════════════════
|
||||
h1("6. Critical Risk Protocols & Root Causes")
|
||||
h2("6.1 CRP Comparison")
|
||||
add_chart("crp_by_pd", 5.5)
|
||||
|
||||
h2("6.2 Root Cause Comparison")
|
||||
add_chart("rootcause_by_pd", 5.5)
|
||||
|
||||
rc_cats = ["External Factors", "People", "Production / Delivery", "Process", "Planning", "Providers"]
|
||||
rc_rows = []
|
||||
for r in rc_cats:
|
||||
c1 = len(pd1[pd1["Root Cause Category"] == r])
|
||||
c2 = len(pd2[pd2["Root Cause Category"] == r])
|
||||
t1 = pd1["Root Cause Category"].notna().sum()
|
||||
t2 = pd2["Root Cause Category"].notna().sum()
|
||||
rc_rows.append([r, str(c1), f"{c1/max(t1,1)*100:.1f}%",
|
||||
str(c2), f"{c2/max(t2,1)*100:.1f}%"])
|
||||
_add_styled_table(doc, ["Root Cause", pd1_name, "%", pd2_name, "%"], rc_rows,
|
||||
[2.0, 1.1, 0.8, 1.0, 0.8])
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# 7. TIMING PATTERNS
|
||||
# ═══════════════════════════════════════════
|
||||
h1("7. Timing Patterns")
|
||||
add_chart("dow_by_pd", 5.5)
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# 8. INVESTIGATION PERFORMANCE
|
||||
# ═══════════════════════════════════════════
|
||||
h1("8. Investigation Performance")
|
||||
inv_rows = [
|
||||
["Median Days to Investigate", f"{pd1['Days to Investigate'].dropna().median():.0f}",
|
||||
f"{pd2['Days to Investigate'].dropna().median():.0f}"],
|
||||
["Mean Days to Investigate", f"{pd1['Days to Investigate'].dropna().mean():.1f}",
|
||||
f"{pd2['Days to Investigate'].dropna().mean():.1f}"],
|
||||
["Median Days to Close", f"{pd1['Days to Close'].dropna().median():.0f}",
|
||||
f"{pd2['Days to Close'].dropna().median():.0f}"],
|
||||
["Mean Days to Close", f"{pd1['Days to Close'].dropna().mean():.1f}",
|
||||
f"{pd2['Days to Close'].dropna().mean():.1f}"],
|
||||
["Events Closed", f"{(pd1['Status']=='Closed').sum()} ({(pd1['Status']=='Closed').sum()/max(len(pd1),1)*100:.0f}%)",
|
||||
f"{(pd2['Status']=='Closed').sum()} ({(pd2['Status']=='Closed').sum()/max(len(pd2),1)*100:.0f}%)"],
|
||||
["Events Open", str((pd1["Status"] == "Open").sum()), str((pd2["Status"] == "Open").sum())],
|
||||
]
|
||||
_add_styled_table(doc, ["Metric", pd1_name, pd2_name], inv_rows, [2.5, 1.8, 1.8])
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# 9. RECOMMENDATIONS
|
||||
# ═══════════════════════════════════════════
|
||||
h1("9. Key Findings & Recommendations")
|
||||
|
||||
h2(f"9.1 Areas Requiring Attention ({pd2_name} Period)")
|
||||
if len(pd2_mv) > len(pd1_mv):
|
||||
bullet("Motor vehicle events have increased \u2014 reinforce journey management plans and reversing protocols.")
|
||||
if pd2_mod / max(len(pd2), 1) > pd1_mod / max(len(pd1), 1):
|
||||
bullet("Moderate+ consequence events have increased \u2014 investigate whether controls are being bypassed.")
|
||||
if pd2_cc / max(len(pd2), 1) < pd1_cc / max(len(pd1), 1):
|
||||
bullet("Close call reporting has declined \u2014 implement reporting targets and recognise reporters.")
|
||||
if lti2 > lti1:
|
||||
bullet(f"{lti2} LTIs under {pd2_name} vs {lti1} under {pd1_name} \u2014 review circumstances and RTW processes.")
|
||||
|
||||
h2("9.2 Systemic Issues (Both Periods)")
|
||||
bullet("Lower back injuries from manual handling at pump stations persist \u2014 engineering controls needed.")
|
||||
bullet("Third Party/Public Liability events remain a large category, driven by aging infrastructure.")
|
||||
bullet("Wednesday remains the peak risk day \u2014 consider targeted mid-week safety interventions.")
|
||||
|
||||
h2("9.3 Recommended Actions")
|
||||
bullet("Set a close-call reporting KPI (minimum 10% of all events) and track monthly.")
|
||||
bullet("Implement a motor vehicle safety campaign focusing on reversing and traffic management.")
|
||||
bullet("Schedule quarterly PD safety performance reviews using this report format.")
|
||||
|
||||
# ── Save ──
|
||||
output_path = os.path.join(output_dir, "SHEQ_PD_Comparison.docx")
|
||||
doc.save(output_path)
|
||||
return output_path
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════
|
||||
# MAIN ENTRY POINT
|
||||
# ═══════════════════════════════════════════════
|
||||
|
||||
def run_analysis(filepath, start_date, split_date, pd1_name, pd2_name, output_dir="output"):
|
||||
"""Run the full analysis pipeline."""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
print(f"Loading data from {filepath}...")
|
||||
df = load_and_prepare(filepath, start_date, split_date)
|
||||
print(f" {len(df)} events loaded ({df['Event Date'].min().date()} to {df['Event Date'].max().date()})")
|
||||
print(f" {pd1_name}: {(df['PD']=='pd1').sum()} events")
|
||||
print(f" {pd2_name}: {(df['PD']=='pd2').sum()} events")
|
||||
|
||||
print("Generating charts...")
|
||||
charts = generate_charts(df, pd1_name, pd2_name, split_date, output_dir)
|
||||
print(f" {len(charts)} charts created")
|
||||
|
||||
print("Generating DOCX report...")
|
||||
docx_path = generate_docx(df, pd1_name, pd2_name, split_date, charts, output_dir)
|
||||
print(f" Report saved to {docx_path}")
|
||||
|
||||
return docx_path
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_analysis(
|
||||
filepath="All_Events__5_.xlsx",
|
||||
start_date="2024-01-01",
|
||||
split_date="2025-04-01",
|
||||
pd1_name="Matthew Arthur",
|
||||
pd2_name="Manga",
|
||||
output_dir="output"
|
||||
)
|
||||
Reference in New Issue
Block a user