""" SHEQ Incident Analysis Engine Generates charts and a DOCX report comparing two Project Director periods. Usage: from analysis import run_analysis run_analysis("All_Events__5_.xlsx", "2024-01-01", "2025-04-01", "Matthew Arthur", "Manga", output_dir="output") """ import os import pandas as pd import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np from docx import Document from docx.shared import Inches, Pt, Cm, RGBColor, Emu from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.table import WD_TABLE_ALIGNMENT from docx.oxml.ns import qn, nsdecls from docx.oxml import parse_xml from io import BytesIO # ── Brand Colours (see DESIGN.md) ── # Primary DEEP_BLUE = RGBColor(0x0B, 0x32, 0x54) SKY_BLUE = RGBColor(0x13, 0xB5, 0xEA) # Secondary DARK_GREEN = RGBColor(0x00, 0x6E, 0x47) MID_GREEN = RGBColor(0x00, 0x99, 0x46) LIGHT_GREEN = RGBColor(0x7B, 0xC1, 0x43) PURPLE = RGBColor(0x96, 0x35, 0x8D) # Functional GREY = RGBColor(0x64, 0x74, 0x8B) # Aliases used throughout NAVY = DEEP_BLUE TEAL = SKY_BLUE GREEN = DARK_GREEN # Hex versions for matplotlib DEEP_BLUE_HEX = "#0b3254" SKY_BLUE_HEX = "#13b5ea" DARK_GREEN_HEX = "#006e47" MID_GREEN_HEX = "#009946" LIGHT_GREEN_HEX = "#7bc143" PURPLE_HEX = "#96358d" AMBER_HEX = "#d97706" RED_HEX = "#dc2626" # Chart palette sequence per DESIGN.md CHART_PALETTE = [DEEP_BLUE_HEX, SKY_BLUE_HEX, DARK_GREEN_HEX, MID_GREEN_HEX, LIGHT_GREEN_HEX, PURPLE_HEX, AMBER_HEX, RED_HEX] # PD comparison colours MA_HEX = DEEP_BLUE_HEX # PD1 = Deep Blue MG_HEX = SKY_BLUE_HEX # PD2 = Sky Blue # ═══════════════════════════════════════════════ # DATA LOADING & PREPARATION # ═══════════════════════════════════════════════ def load_and_prepare(filepath, start_date, split_date): """Load Excel, filter by date range, add PD column.""" df = pd.read_excel(filepath) df["Event Date"] = pd.to_datetime(df["Event Date"]) df = df[df["Event Date"] >= pd.Timestamp(start_date)].copy() df["Year"] = df["Event Date"].dt.year df["Month"] = df["Event Date"].dt.month df["MonthName"] = df["Event Date"].dt.strftime("%b") df["DOW"] = df["Event Date"].dt.day_name() df["YearMonth"] = df["Event Date"].dt.to_period("M") df["PD"] = df["Event Date"].apply( lambda x: "pd1" if x < pd.Timestamp(split_date) else "pd2" ) return df def get_body_parts(series): """Split multi-value body part entries and normalise.""" parts = [] for val in series.dropna(): for part in str(val).split(","): part = part.strip() if part and "unspecified" not in part.lower(): parts.append(part) return pd.Series(parts) # ═══════════════════════════════════════════════ # CHART GENERATION # ═══════════════════════════════════════════════ def _save(fig, path): fig.tight_layout() fig.savefig(path, dpi=200, bbox_inches="tight", facecolor="white") plt.close(fig) def _setup_chart_style(): """Configure matplotlib to use Source Sans Pro if available.""" import matplotlib.font_manager as fm available = [f.name for f in fm.fontManager.ttflist] if "Source Sans Pro" in available: plt.rcParams["font.family"] = "Source Sans Pro" elif "Source Sans 3" in available: plt.rcParams["font.family"] = "Source Sans 3" else: plt.rcParams["font.family"] = "sans-serif" def generate_charts(df, pd1_name, pd2_name, split_date, output_dir): """Generate all comparison charts, return dict of paths.""" _setup_chart_style() charts = {} pd1 = df[df["PD"] == "pd1"] pd2 = df[df["PD"] == "pd2"] # Consequence severity colours per DESIGN.md CONS_COLORS = [DARK_GREEN_HEX, AMBER_HEX, RED_HEX, PURPLE_HEX] # 1. Monthly trend by PD fig, ax = plt.subplots(figsize=(10, 4)) start_period = df["Event Date"].min().to_period("M") end_period = df["Event Date"].max().to_period("M") months_all = pd.period_range(start_period, end_period, freq="M") monthly = df.groupby(["YearMonth", "PD"]).size().unstack(fill_value=0).reindex(months_all, fill_value=0) x = range(len(months_all)) labels = [m.strftime("%b %y") for m in months_all] ma_vals = monthly.get("pd1", pd.Series(0, index=months_all)).values mg_vals = monthly.get("pd2", pd.Series(0, index=months_all)).values ax.bar(x, ma_vals, color=MA_HEX, label=pd1_name, width=0.7, alpha=0.9) ax.bar(x, mg_vals, bottom=ma_vals, color=MG_HEX, label=pd2_name, width=0.7, alpha=0.9) split_m = pd.Timestamp(split_date).to_period("M") if split_m in months_all: trans_idx = list(months_all).index(split_m) ax.axvline(x=trans_idx - 0.5, color=RED_HEX, linestyle="--", linewidth=1.5, alpha=0.7) ax.text(trans_idx - 0.3, max(max(ma_vals + mg_vals), 1) * 0.95, "PD Transition", fontsize=9, color=RED_HEX, ha="left") ax.set_xticks(x) ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8) ax.set_title("Monthly Events by Project Director", fontsize=14, fontweight="bold", color=MA_HEX) ax.set_ylabel("Events") ax.legend(loc="upper right") ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) p = os.path.join(output_dir, "monthly_by_pd.png") _save(fig, p) charts["monthly_by_pd"] = p # 2. Event type comparison evt_types = df["Event Type"].value_counts().index[:8] ma_evt = pd1["Event Type"].value_counts().reindex(evt_types, fill_value=0) mg_evt = pd2["Event Type"].value_counts().reindex(evt_types, fill_value=0) fig, ax = plt.subplots(figsize=(9, 5)) y = np.arange(len(evt_types)) h = 0.35 ax.barh(y - h / 2, ma_evt.values, h, label=pd1_name, color=MA_HEX) ax.barh(y + h / 2, mg_evt.values, h, label=pd2_name, color=MG_HEX) ax.set_yticks(y) ax.set_yticklabels(evt_types, fontsize=10) ax.invert_yaxis() ax.set_title("Event Types by Project Director", fontsize=14, fontweight="bold", color=MA_HEX) ax.legend() ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) for i, (v1, v2) in enumerate(zip(ma_evt.values, mg_evt.values)): ax.text(v1 + 0.2, i - h / 2, str(v1), va="center", fontsize=9, color=MA_HEX) ax.text(v2 + 0.2, i + h / 2, str(v2), va="center", fontsize=9, color=MG_HEX) p = os.path.join(output_dir, "event_type_by_pd.png") _save(fig, p) charts["event_type_by_pd"] = p # 3. Consequence comparison (pie charts) cons_order = ["Negligible", "Minor", "Moderate", "Major"] fig, axes = plt.subplots(1, 2, figsize=(9, 3.5)) for ax, sub, title in zip(axes, [pd1, pd2], [pd1_name, pd2_name]): data = sub["Actual Consequence"].value_counts().reindex(cons_order, fill_value=0) ax.pie(data.values, labels=cons_order, autopct="%1.0f%%", colors=CONS_COLORS, startangle=140, textprops={"fontsize": 9}) ax.set_title(title, fontsize=13, fontweight="bold", color=MA_HEX) p = os.path.join(output_dir, "consequence_by_pd.png") _save(fig, p) charts["consequence_by_pd"] = p # 4. Day of week dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] fig, ax = plt.subplots(figsize=(9, 4)) x_arr = np.arange(len(dow_order)) w = 0.35 ma_d = pd1["DOW"].value_counts().reindex(dow_order, fill_value=0) mg_d = pd2["DOW"].value_counts().reindex(dow_order, fill_value=0) b1 = ax.bar(x_arr - w / 2, ma_d.values, w, label=pd1_name, color=MA_HEX) b2 = ax.bar(x_arr + w / 2, mg_d.values, w, label=pd2_name, color=MG_HEX) ax.set_xticks(x_arr) ax.set_xticklabels([d[:3] for d in dow_order]) ax.set_title("Events by Day of Week", fontsize=14, fontweight="bold", color=MA_HEX) ax.legend() ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) for b in b1: if b.get_height() > 0: ax.text(b.get_x() + b.get_width() / 2, b.get_height() + 0.3, str(int(b.get_height())), ha="center", fontsize=9) for b in b2: if b.get_height() > 0: ax.text(b.get_x() + b.get_width() / 2, b.get_height() + 0.3, str(int(b.get_height())), ha="center", fontsize=9) p = os.path.join(output_dir, "dow_by_pd.png") _save(fig, p) charts["dow_by_pd"] = p # 5. Root cause rc_cats = ["External Factors", "People", "Production / Delivery", "Process", "Planning", "Providers"] fig, ax = plt.subplots(figsize=(9, 4)) y = np.arange(len(rc_cats)) h = 0.35 ma_rc = pd1["Root Cause Category"].value_counts().reindex(rc_cats, fill_value=0) mg_rc = pd2["Root Cause Category"].value_counts().reindex(rc_cats, fill_value=0) ax.barh(y - h / 2, ma_rc.values, h, label=pd1_name, color=MA_HEX) ax.barh(y + h / 2, mg_rc.values, h, label=pd2_name, color=MG_HEX) ax.set_yticks(y) ax.set_yticklabels(rc_cats, fontsize=10) ax.invert_yaxis() ax.set_title("Root Cause Categories by Project Director", fontsize=14, fontweight="bold", color=MA_HEX) ax.legend() ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) p = os.path.join(output_dir, "rootcause_by_pd.png") _save(fig, p) charts["rootcause_by_pd"] = p # 6. CRP comparison crp_all = df["CRPInvolved"].value_counts() crp_active = crp_all[~crp_all.index.isin(["None Identified", "Under Investigation"])].head(8) crp_cats = crp_active.index fig, ax = plt.subplots(figsize=(9, 4.5)) y = np.arange(len(crp_cats)) ma_c = pd1["CRPInvolved"].value_counts().reindex(crp_cats, fill_value=0) mg_c = pd2["CRPInvolved"].value_counts().reindex(crp_cats, fill_value=0) ax.barh(y - h / 2, ma_c.values, h, label=pd1_name, color=MA_HEX) ax.barh(y + h / 2, mg_c.values, h, label=pd2_name, color=MG_HEX) ax.set_yticks(y) ax.set_yticklabels(crp_cats, fontsize=9) ax.invert_yaxis() ax.set_title("Critical Risk Protocols by Project Director", fontsize=14, fontweight="bold", color=MA_HEX) ax.legend() ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) p = os.path.join(output_dir, "crp_by_pd.png") _save(fig, p) charts["crp_by_pd"] = p # 7. Body parts bp_series = get_body_parts(df["Bodily Location"]) if len(bp_series) > 0: bp_top = bp_series.value_counts().head(10) fig, ax = plt.subplots(figsize=(8, 4)) ax.barh(range(len(bp_top)), bp_top.values, color=DARK_GREEN_HEX) ax.set_yticks(range(len(bp_top))) ax.set_yticklabels(bp_top.index, fontsize=10) ax.invert_yaxis() for i, v in enumerate(bp_top.values): ax.text(v + 0.1, i, str(v), va="center", fontsize=11, fontweight="bold") ax.set_title("Top Injured Body Parts", fontsize=14, fontweight="bold", color=MA_HEX) ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) p = os.path.join(output_dir, "body_parts.png") _save(fig, p) charts["body_parts"] = p return charts # ═══════════════════════════════════════════════ # DOCX GENERATION # ═══════════════════════════════════════════════ def _set_cell_shading(cell, color_hex): """Apply background shading to a table cell.""" shading = parse_xml(f'') cell._tc.get_or_add_tcPr().append(shading) def _add_styled_table(doc, headers, rows, col_widths_inches): """Add a formatted comparison table.""" table = doc.add_table(rows=1 + len(rows), cols=len(headers)) table.alignment = WD_TABLE_ALIGNMENT.LEFT table.style = "Table Grid" # Header row for i, h in enumerate(headers): cell = table.rows[0].cells[i] cell.text = "" p = cell.paragraphs[0] run = p.add_run(h) run.bold = True run.font.size = Pt(9) run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) run.font.name = "Source Sans Pro" _set_cell_shading(cell, "0b3254") # Data rows for ri, row in enumerate(rows): for ci, val in enumerate(row): cell = table.rows[ri + 1].cells[ci] cell.text = "" p = cell.paragraphs[0] run = p.add_run(str(val)) run.font.size = Pt(9) run.font.name = "Source Sans Pro" bg = "F0F5FA" if ri % 2 == 0 else "FFFFFF" _set_cell_shading(cell, bg) # Set column widths for i, w in enumerate(col_widths_inches): for row in table.rows: row.cells[i].width = Inches(w) return table def generate_docx(df, pd1_name, pd2_name, split_date, charts, output_dir): """Generate the full DOCX report.""" doc = Document() # Set default font style = doc.styles["Normal"] style.font.name = "Source Sans Pro" style.font.size = Pt(11) # Heading styles for level, size, color in [(1, 16, NAVY), (2, 13, TEAL)]: hs = doc.styles[f"Heading {level}"] hs.font.name = "Source Sans Pro" hs.font.size = Pt(size) hs.font.color.rgb = color hs.font.bold = True pd1 = df[df["PD"] == "pd1"] pd2 = df[df["PD"] == "pd2"] total = len(df) pd1_months = max(1, (pd.Timestamp(split_date) - df["Event Date"].min()).days / 30.44) pd2_months = max(1, (df["Event Date"].max() - pd.Timestamp(split_date)).days / 30.44 + 1) pd1_start = pd1["Event Date"].min().strftime("%b %Y") if len(pd1) > 0 else "N/A" pd1_end = pd1["Event Date"].max().strftime("%b %Y") if len(pd1) > 0 else "N/A" pd2_start = pd2["Event Date"].min().strftime("%b %Y") if len(pd2) > 0 else "N/A" pd2_end = pd2["Event Date"].max().strftime("%b %Y") if len(pd2) > 0 else "N/A" # ── Title page ── doc.add_paragraph("") doc.add_paragraph("") p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run("SHEQ Incident Analysis") run.font.size = Pt(28) run.bold = True run.font.name = "Source Sans Pro" run.font.color.rgb = NAVY p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run("Far North Waters Project") run.font.size = Pt(16) run.font.name = "Source Sans Pro" run.font.color.rgb = TEAL p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(f"{pd1_start} \u2013 {pd2_end} (MTD)") run.font.size = Pt(14) run.font.name = "Source Sans Pro" run.font.color.rgb = TEAL doc.add_paragraph("") p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run("Performance by Project Director") run.font.size = Pt(13) run.bold = True run.font.name = "Source Sans Pro" run.font.color.rgb = NAVY p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(f"{pd1_name} ") run.bold = True run.font.color.rgb = NAVY run = p.add_run(f"({pd1_start} \u2013 {pd1_end}) | ") run.font.color.rgb = GREY run = p.add_run(f"{pd2_name} ") run.bold = True run.font.color.rgb = TEAL run = p.add_run(f"({pd2_start} \u2013 {pd2_end})") run.font.color.rgb = GREY doc.add_paragraph("") p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run("Ventia \u2022 Infrastructure Services \u2022 Water & Environmental Services") run.font.size = Pt(10) run.font.color.rgb = GREY doc.add_page_break() # ── Helper functions ── def h1(text): doc.add_heading(text, level=1) def h2(text): doc.add_heading(text, level=2) def text(t, bold=False): p = doc.add_paragraph() run = p.add_run(t) run.bold = bold return p def bullet(t): p = doc.add_paragraph(t, style="List Bullet") return p def add_chart(name, width=5.5): if name in charts: doc.add_picture(charts[name], width=Inches(width)) # Helper for injury classification def _inj_class(sub): return sub["Ventia Injury Classification"].value_counts() # ═══════════════════════════════════════════ # 1. EXECUTIVE SUMMARY # ═══════════════════════════════════════════ h1("1. Executive Summary") text(f"This report analyses {total} SHEQ events recorded for the Far North Waters project " f"from {pd1_start} to {pd2_end} (month-to-date). The analysis is structured around " f"two Project Director tenures to enable performance comparison:") pd1_inj = pd1[pd1["Event Type"] == "Injury/Illness Sustained"] pd2_inj = pd2[pd2["Event Type"] == "Injury/Illness Sustained"] pd1_mv = pd1[pd1["Event Type"] == "Motor Vehicle"] pd2_mv = pd2[pd2["Event Type"] == "Motor Vehicle"] pd1_ic = _inj_class(pd1) pd2_ic = _inj_class(pd2) pd1_cc = len(pd1[pd1["Event Type"] == "Close Call"]) pd2_cc = len(pd2[pd2["Event Type"] == "Close Call"]) pd1_mod = len(pd1[pd1["Actual Consequence"].isin(["Moderate", "Major", "Substantial"])]) pd2_mod = len(pd2[pd2["Actual Consequence"].isin(["Moderate", "Major", "Substantial"])]) _add_styled_table(doc, ["", pd1_name, pd2_name], [ ["Period", f"{pd1_start} \u2013 {pd1_end}", f"{pd2_start} \u2013 {pd2_end}"], ["Duration", f"{pd1_months:.0f} months", f"{pd2_months:.0f} months"], ["Total Events", str(len(pd1)), str(len(pd2))], ["Events per Month", f"{len(pd1)/pd1_months:.1f}", f"{len(pd2)/pd2_months:.1f}"], ["Injuries", f"{len(pd1_inj)} ({len(pd1_inj)/max(len(pd1),1)*100:.1f}%)", f"{len(pd2_inj)} ({len(pd2_inj)/max(len(pd2),1)*100:.1f}%)"], ["Motor Vehicle Events", f"{len(pd1_mv)} ({len(pd1_mv)/max(len(pd1),1)*100:.1f}%)", f"{len(pd2_mv)} ({len(pd2_mv)/max(len(pd2),1)*100:.1f}%)"], ["Lost Time Injuries", str(pd1_ic.get("Lost Time Injury", 0)), str(pd2_ic.get("Lost Time Injury", 0))], ["First Aid Treatments", str(pd1_ic.get("First Aid Treatment", 0)), str(pd2_ic.get("First Aid Treatment", 0))], ["Close Calls", f"{pd1_cc} ({pd1_cc/max(len(pd1),1)*100:.1f}%)", f"{pd2_cc} ({pd2_cc/max(len(pd2),1)*100:.1f}%)"], ["Moderate+ Consequence", f"{pd1_mod} ({pd1_mod/max(len(pd1),1)*100:.1f}%)", f"{pd2_mod} ({pd2_mod/max(len(pd2),1)*100:.1f}%)"], ["Median Days to Investigate", f"{pd1['Days to Investigate'].dropna().median():.0f}", f"{pd2['Days to Investigate'].dropna().median():.0f}"], ["Median Days to Close", f"{pd1['Days to Close'].dropna().median():.0f}", f"{pd2['Days to Close'].dropna().median():.0f}"], ], [2.0, 2.2, 2.3] ) doc.add_paragraph("") h2("Key Comparative Findings") rate1 = len(pd1) / pd1_months rate2 = len(pd2) / pd2_months bullet(f"Event rate {'increased' if rate2 > rate1 else 'decreased'} under {pd2_name} " f"({rate2:.1f}/month vs {rate1:.1f}/month), with Moderate+ consequences at " f"{pd2_mod/max(len(pd2),1)*100:.1f}% vs {pd1_mod/max(len(pd1),1)*100:.1f}%.") bullet(f"Motor vehicle events: {len(pd2_mv)} under {pd2_name} vs {len(pd1_mv)} under {pd1_name} " f"({len(pd2_mv)/max(len(pd2),1)*100:.1f}% vs {len(pd1_mv)/max(len(pd1),1)*100:.1f}%).") bullet(f"Close call reporting: {pd2_cc/max(len(pd2),1)*100:.1f}% under {pd2_name} vs " f"{pd1_cc/max(len(pd1),1)*100:.1f}% under {pd1_name}.") lti1 = pd1_ic.get("Lost Time Injury", 0) lti2 = pd2_ic.get("Lost Time Injury", 0) if lti2 > lti1: bullet(f"{lti2} Lost Time Injuries under {pd2_name} compared to {lti1} under {pd1_name}.") doc.add_page_break() # ═══════════════════════════════════════════ # 2. MONTHLY TRENDS # ═══════════════════════════════════════════ h1("2. Monthly Event Trends") text("The chart below shows monthly event counts across both Project Director periods.") add_chart("monthly_by_pd", 5.8) doc.add_page_break() # ═══════════════════════════════════════════ # 3. EVENT TYPE COMPARISON # ═══════════════════════════════════════════ h1("3. Event Type Comparison") add_chart("event_type_by_pd", 5.5) evt_types = df["Event Type"].value_counts().index evt_rows = [] for e in evt_types: c1 = len(pd1[pd1["Event Type"] == e]) c2 = len(pd2[pd2["Event Type"] == e]) evt_rows.append([e, str(c1), f"{c1/max(len(pd1),1)*100:.1f}%", str(c2), f"{c2/max(len(pd2),1)*100:.1f}%"]) _add_styled_table(doc, ["Event Type", pd1_name, "%", pd2_name, "%"], evt_rows, [2.0, 1.1, 0.8, 1.0, 0.8]) doc.add_paragraph("") text("Notable shifts:", bold=True) # Auto-detect biggest shifts for e in evt_types: c1 = len(pd1[pd1["Event Type"] == e]) c2 = len(pd2[pd2["Event Type"] == e]) pct1 = c1 / max(len(pd1), 1) * 100 pct2 = c2 / max(len(pd2), 1) * 100 if abs(pct2 - pct1) > 5: direction = "increased" if pct2 > pct1 else "decreased" bullet(f"{e} {direction}: {pct1:.1f}% \u2192 {pct2:.1f}% ({c1} \u2192 {c2} events).") doc.add_page_break() # ═══════════════════════════════════════════ # 4. INJURY ANALYSIS # ═══════════════════════════════════════════ h1("4. Injury Analysis") h2("4.1 Injury Classification") inj_classes = ["First Aid Treatment", "Report Only", "Non-Work Related", "Lost Time Injury", "Medical Treatment Injury"] inj_rows = [[c, str(pd1_ic.get(c, 0)), str(pd2_ic.get(c, 0))] for c in inj_classes] _add_styled_table(doc, ["Classification", pd1_name, pd2_name], inj_rows, [2.5, 1.8, 1.8]) h2("4.2 Body Parts Injured") add_chart("body_parts", 5.0) # Body part comparison bp1 = get_body_parts(pd1["Bodily Location"]).value_counts().head(6) bp2 = get_body_parts(pd2["Bodily Location"]).value_counts().head(6) all_bp = list(dict.fromkeys(list(bp1.index) + list(bp2.index)))[:8] bp_rows = [[bp, str(bp1.get(bp, 0)), str(bp2.get(bp, 0))] for bp in all_bp] _add_styled_table(doc, ["Body Part", pd1_name, pd2_name], bp_rows, [2.5, 1.8, 1.8]) doc.add_page_break() # ═══════════════════════════════════════════ # 5. CONSEQUENCE ANALYSIS # ═══════════════════════════════════════════ h1("5. Consequence Analysis") add_chart("consequence_by_pd", 5.5) cons_order = ["Negligible", "Minor", "Moderate", "Major"] cons_rows = [] for c in cons_order: c1 = len(pd1[pd1["Actual Consequence"] == c]) c2 = len(pd2[pd2["Actual Consequence"] == c]) cons_rows.append([c, str(c1), f"{c1/max(len(pd1),1)*100:.1f}%", str(c2), f"{c2/max(len(pd2),1)*100:.1f}%"]) _add_styled_table(doc, ["Consequence", pd1_name, "%", pd2_name, "%"], cons_rows, [1.5, 1.0, 0.8, 1.0, 0.8]) doc.add_page_break() # ═══════════════════════════════════════════ # 6. CRP & ROOT CAUSE # ═══════════════════════════════════════════ h1("6. Critical Risk Protocols & Root Causes") h2("6.1 CRP Comparison") add_chart("crp_by_pd", 5.5) h2("6.2 Root Cause Comparison") add_chart("rootcause_by_pd", 5.5) rc_cats = ["External Factors", "People", "Production / Delivery", "Process", "Planning", "Providers"] rc_rows = [] for r in rc_cats: c1 = len(pd1[pd1["Root Cause Category"] == r]) c2 = len(pd2[pd2["Root Cause Category"] == r]) t1 = pd1["Root Cause Category"].notna().sum() t2 = pd2["Root Cause Category"].notna().sum() rc_rows.append([r, str(c1), f"{c1/max(t1,1)*100:.1f}%", str(c2), f"{c2/max(t2,1)*100:.1f}%"]) _add_styled_table(doc, ["Root Cause", pd1_name, "%", pd2_name, "%"], rc_rows, [2.0, 1.1, 0.8, 1.0, 0.8]) doc.add_page_break() # ═══════════════════════════════════════════ # 7. TIMING PATTERNS # ═══════════════════════════════════════════ h1("7. Timing Patterns") add_chart("dow_by_pd", 5.5) doc.add_page_break() # ═══════════════════════════════════════════ # 8. INVESTIGATION PERFORMANCE # ═══════════════════════════════════════════ h1("8. Investigation Performance") inv_rows = [ ["Median Days to Investigate", f"{pd1['Days to Investigate'].dropna().median():.0f}", f"{pd2['Days to Investigate'].dropna().median():.0f}"], ["Mean Days to Investigate", f"{pd1['Days to Investigate'].dropna().mean():.1f}", f"{pd2['Days to Investigate'].dropna().mean():.1f}"], ["Median Days to Close", f"{pd1['Days to Close'].dropna().median():.0f}", f"{pd2['Days to Close'].dropna().median():.0f}"], ["Mean Days to Close", f"{pd1['Days to Close'].dropna().mean():.1f}", f"{pd2['Days to Close'].dropna().mean():.1f}"], ["Events Closed", f"{(pd1['Status']=='Closed').sum()} ({(pd1['Status']=='Closed').sum()/max(len(pd1),1)*100:.0f}%)", f"{(pd2['Status']=='Closed').sum()} ({(pd2['Status']=='Closed').sum()/max(len(pd2),1)*100:.0f}%)"], ["Events Open", str((pd1["Status"] == "Open").sum()), str((pd2["Status"] == "Open").sum())], ] _add_styled_table(doc, ["Metric", pd1_name, pd2_name], inv_rows, [2.5, 1.8, 1.8]) doc.add_page_break() # ═══════════════════════════════════════════ # 9. RECOMMENDATIONS # ═══════════════════════════════════════════ h1("9. Key Findings & Recommendations") h2(f"9.1 Areas Requiring Attention ({pd2_name} Period)") if len(pd2_mv) > len(pd1_mv): bullet("Motor vehicle events have increased \u2014 reinforce journey management plans and reversing protocols.") if pd2_mod / max(len(pd2), 1) > pd1_mod / max(len(pd1), 1): bullet("Moderate+ consequence events have increased \u2014 investigate whether controls are being bypassed.") if pd2_cc / max(len(pd2), 1) < pd1_cc / max(len(pd1), 1): bullet("Close call reporting has declined \u2014 implement reporting targets and recognise reporters.") if lti2 > lti1: bullet(f"{lti2} LTIs under {pd2_name} vs {lti1} under {pd1_name} \u2014 review circumstances and RTW processes.") h2("9.2 Systemic Issues (Both Periods)") bullet("Lower back injuries from manual handling at pump stations persist \u2014 engineering controls needed.") bullet("Third Party/Public Liability events remain a large category, driven by aging infrastructure.") bullet("Wednesday remains the peak risk day \u2014 consider targeted mid-week safety interventions.") h2("9.3 Recommended Actions") bullet("Set a close-call reporting KPI (minimum 10% of all events) and track monthly.") bullet("Implement a motor vehicle safety campaign focusing on reversing and traffic management.") bullet("Schedule quarterly PD safety performance reviews using this report format.") # ── Save ── output_path = os.path.join(output_dir, "SHEQ_PD_Comparison.docx") doc.save(output_path) return output_path # ═══════════════════════════════════════════════ # MAIN ENTRY POINT # ═══════════════════════════════════════════════ def run_analysis(filepath, start_date, split_date, pd1_name, pd2_name, output_dir="output"): """Run the full analysis pipeline.""" os.makedirs(output_dir, exist_ok=True) print(f"Loading data from {filepath}...") df = load_and_prepare(filepath, start_date, split_date) print(f" {len(df)} events loaded ({df['Event Date'].min().date()} to {df['Event Date'].max().date()})") print(f" {pd1_name}: {(df['PD']=='pd1').sum()} events") print(f" {pd2_name}: {(df['PD']=='pd2').sum()} events") print("Generating charts...") charts = generate_charts(df, pd1_name, pd2_name, split_date, output_dir) print(f" {len(charts)} charts created") print("Generating DOCX report...") docx_path = generate_docx(df, pd1_name, pd2_name, split_date, charts, output_dir) print(f" Report saved to {docx_path}") return docx_path if __name__ == "__main__": run_analysis( filepath="All_Events__5_.xlsx", start_date="2024-01-01", split_date="2025-04-01", pd1_name="Matthew Arthur", pd2_name="Manga", output_dir="output" )