diff --git a/0000.xlsx b/0000.xlsx new file mode 100644 index 0000000..54e7018 Binary files /dev/null and b/0000.xlsx differ diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 0000000..1833aee --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,169 @@ +# DESIGN.md — Ventia Brand Guidelines for SHEQ Tool + +## Typography + +**Primary font family:** Source Sans Pro + +Source Sans Pro is our primary design font family used across all brand applications and designed collateral. It offers a wide range of weights in both roman and italic. + +| Weight | Use Case | +|---------------|---------------------------------------| +| Bold (700) | Headings, stat callouts, table headers | +| SemiBold (600)| Sub-headings, emphasis labels | +| Regular (400) | Body text, table data, bullet points | +| Light (300) | Captions, footnotes, muted annotations | + +**Fallback stack:** `"Source Sans Pro", "Source Sans 3", -apple-system, "Segoe UI", sans-serif` + +### Sizing + +| Element | DOCX (pt) | Web (rem) | +|--------------------|-----------|-----------| +| Report title | 28 | 2.0 | +| Section heading | 16 | 1.5 | +| Sub-heading | 13 | 1.15 | +| Body text | 11 | 1.0 | +| Table cell | 9–10 | 0.85 | +| Caption / footnote | 8 | 0.75 | + +--- + +## Colour Palette + +### Primary Colours + +The signature colours are **Deep Blue** and **Sky Blue**. These should be the lead colours in most instances. + +| Name | HEX | RGB | CMYK | PMS | Usage | +|-----------|-----------|-----------------|--------------------|----------|---------------------------------------------| +| Deep Blue | `#0b3254` | 11, 50, 84 | 100, 80, 39, 37 | PMS 540C | Headings, header bars, table headers, nav | +| Sky Blue | `#13b5ea` | 19, 181, 234 | 69, 7, 0, 0 | PMS 298C | Sub-headings, accents, links, chart highlight | + +### Secondary Colours + +The secondary palette allows for flexibility, versatility and personality in the brand. + +| Name | HEX | RGB | CMYK | PMS | Usage | +|-------------|-----------|-----------------|--------------------|-----------|---------------------------------------------| +| Dark Green | `#006e47` | 0, 110, 71 | 100, 30, 88, 21 | PMS 7727C | Positive indicators, body part charts | +| Mid Green | `#009946` | 0, 153, 70 | 96, 10, 100, 1 | PMS 347C | Secondary positive, trend improvements | +| Light Green | `#7bc143` | 123, 193, 67 | 57, 0, 100, 0 | PMS 368C | Tertiary positive, low-severity shading | +| Purple | `#96358d` | 150, 53, 141 | 48, 94, 5, 0 | PMS 513C | Categorical distinction, chart series accent | + +### Functional Colours + +These are derived from the brand palette and used for semantic meaning in data visualisation and reporting. + +| Role | Colour | HEX | Notes | +|---------------|-------------|-----------|--------------------------------------| +| Warning | Amber | `#d97706` | Moderate consequence, caution states | +| Critical | Red | `#dc2626` | Major/Substantial, LTI, alerts | +| Muted text | Slate grey | `#64748b` | Captions, secondary labels | +| Card background | Off-white | `#f0f5fa` | Alternating table rows, card bg | +| Page background | Near-white | `#f8fafc` | Web app body background | +| Borders | Light grey | `#e2e8f0` | Table borders, card edges | + +--- + +## Colour Application + +### DOCX Reports + +- **Page header:** Deep Blue `#0b3254` underline rule +- **Heading 1:** Deep Blue `#0b3254`, Source Sans Pro Bold 16pt +- **Heading 2:** Sky Blue `#13b5ea`, Source Sans Pro SemiBold 13pt +- **Table header row:** Deep Blue `#0b3254` fill, white text +- **Alternating rows:** Off-white `#f0f5fa` / white `#ffffff` +- **Footer text:** Slate grey `#64748b` + +### Web App + +- **Navigation / header bar:** Deep Blue `#0b3254` +- **Primary buttons:** Deep Blue `#0b3254` +- **Secondary buttons:** Sky Blue `#13b5ea` +- **Active accent / links:** Sky Blue `#13b5ea` +- **Sidebar background:** White `#ffffff` with light grey border +- **Body background:** Near-white `#f8fafc` + +### Charts & Data Visualisation + +Use this sequence for multi-series charts: + +``` +Series 1: Deep Blue #0b3254 +Series 2: Sky Blue #13b5ea +Series 3: Dark Green #006e47 +Series 4: Mid Green #009946 +Series 5: Light Green #7bc143 +Series 6: Purple #96358d +Series 7: Amber #d97706 +Series 8: Red #dc2626 +``` + +For PD comparison charts: +- **PD 1 (prior):** Deep Blue `#0b3254` +- **PD 2 (current):** Sky Blue `#13b5ea` + +For consequence severity: +- **Negligible:** Dark Green `#006e47` +- **Minor:** Amber `#d97706` +- **Moderate:** Red `#dc2626` +- **Major / Substantial:** Purple `#96358d` + +--- + +## CSS Variables (Web App) + +```css +:root { + /* Primary */ + --deep-blue: #0b3254; + --sky-blue: #13b5ea; + + /* Secondary */ + --dark-green: #006e47; + --mid-green: #009946; + --light-green: #7bc143; + --purple: #96358d; + + /* Functional */ + --amber: #d97706; + --red: #dc2626; + --muted: #64748b; + --card-bg: #f0f5fa; + --page-bg: #f8fafc; + --border: #e2e8f0; + --text: #1e293b; + --white: #ffffff; + + /* Typography */ + --font-primary: "Source Sans Pro", "Source Sans 3", -apple-system, "Segoe UI", sans-serif; + --font-heading: var(--font-primary); +} +``` + +--- + +## Python Constants (analysis.py) + +```python +# Brand colours +DEEP_BLUE = "#0b3254" +SKY_BLUE = "#13b5ea" +DARK_GREEN = "#006e47" +MID_GREEN = "#009946" +LIGHT_GREEN = "#7bc143" +PURPLE = "#96358d" +AMBER = "#d97706" +RED = "#dc2626" + +CHART_PALETTE = [DEEP_BLUE, SKY_BLUE, DARK_GREEN, MID_GREEN, LIGHT_GREEN, PURPLE, AMBER, RED] +``` + +--- + +## Notes + +- Source Sans Pro is available from [Google Fonts](https://fonts.google.com/specimen/Source+Sans+Pro) and should be installed locally for DOCX rendering. In the web app, import via Google Fonts CDN. +- When Source Sans Pro is unavailable in a DOCX context (e.g. recipient doesn't have it installed), the fallback is Calibri then Arial. +- Always maintain sufficient contrast — do not place Sky Blue text on white backgrounds at small sizes. Use Deep Blue for body text and Sky Blue for headings/accents only. diff --git a/Events.xlsx b/Events.xlsx new file mode 100644 index 0000000..6041f4a Binary files /dev/null and b/Events.xlsx differ diff --git a/LLC_Data.xlsx b/LLC_Data.xlsx new file mode 100644 index 0000000..8c1f587 Binary files /dev/null and b/LLC_Data.xlsx differ diff --git a/Safety_Energy.xlsx b/Safety_Energy.xlsx new file mode 100644 index 0000000..bd5a393 Binary files /dev/null and b/Safety_Energy.xlsx differ diff --git a/__pycache__/analysis.cpython-314.pyc b/__pycache__/analysis.cpython-314.pyc new file mode 100644 index 0000000..b6cb7df Binary files /dev/null and b/__pycache__/analysis.cpython-314.pyc differ diff --git a/__pycache__/analysis_engine.cpython-314.pyc b/__pycache__/analysis_engine.cpython-314.pyc new file mode 100644 index 0000000..3d49a10 Binary files /dev/null and b/__pycache__/analysis_engine.cpython-314.pyc differ diff --git a/__pycache__/app.cpython-314.pyc b/__pycache__/app.cpython-314.pyc new file mode 100644 index 0000000..da3580e Binary files /dev/null and b/__pycache__/app.cpython-314.pyc differ diff --git a/__pycache__/config.cpython-314.pyc b/__pycache__/config.cpython-314.pyc new file mode 100644 index 0000000..5ce8e28 Binary files /dev/null and b/__pycache__/config.cpython-314.pyc differ diff --git a/__pycache__/data_loader.cpython-314.pyc b/__pycache__/data_loader.cpython-314.pyc new file mode 100644 index 0000000..644779c Binary files /dev/null and b/__pycache__/data_loader.cpython-314.pyc differ diff --git a/__pycache__/ppt_builder.cpython-314.pyc b/__pycache__/ppt_builder.cpython-314.pyc new file mode 100644 index 0000000..c1013b2 Binary files /dev/null and b/__pycache__/ppt_builder.cpython-314.pyc differ diff --git a/__pycache__/report_builder.cpython-314.pyc b/__pycache__/report_builder.cpython-314.pyc new file mode 100644 index 0000000..aea1273 Binary files /dev/null and b/__pycache__/report_builder.cpython-314.pyc differ diff --git a/analysis.py b/analysis.py new file mode 100644 index 0000000..6f49d6e --- /dev/null +++ b/analysis.py @@ -0,0 +1,694 @@ +""" +SHEQ Incident Analysis Engine +Generates charts and a DOCX report comparing two Project Director periods. + +Usage: + from analysis import run_analysis + run_analysis("All_Events__5_.xlsx", "2024-01-01", "2025-04-01", + "Matthew Arthur", "Manga", output_dir="output") +""" + +import os +import pandas as pd +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np +from docx import Document +from docx.shared import Inches, Pt, Cm, RGBColor, Emu +from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.enum.table import WD_TABLE_ALIGNMENT +from docx.oxml.ns import qn, nsdecls +from docx.oxml import parse_xml +from io import BytesIO + + +# ── Brand Colours (see DESIGN.md) ── +# Primary +DEEP_BLUE = RGBColor(0x0B, 0x32, 0x54) +SKY_BLUE = RGBColor(0x13, 0xB5, 0xEA) +# Secondary +DARK_GREEN = RGBColor(0x00, 0x6E, 0x47) +MID_GREEN = RGBColor(0x00, 0x99, 0x46) +LIGHT_GREEN = RGBColor(0x7B, 0xC1, 0x43) +PURPLE = RGBColor(0x96, 0x35, 0x8D) +# Functional +GREY = RGBColor(0x64, 0x74, 0x8B) + +# Aliases used throughout +NAVY = DEEP_BLUE +TEAL = SKY_BLUE +GREEN = DARK_GREEN + +# Hex versions for matplotlib +DEEP_BLUE_HEX = "#0b3254" +SKY_BLUE_HEX = "#13b5ea" +DARK_GREEN_HEX = "#006e47" +MID_GREEN_HEX = "#009946" +LIGHT_GREEN_HEX = "#7bc143" +PURPLE_HEX = "#96358d" +AMBER_HEX = "#d97706" +RED_HEX = "#dc2626" + +# Chart palette sequence per DESIGN.md +CHART_PALETTE = [DEEP_BLUE_HEX, SKY_BLUE_HEX, DARK_GREEN_HEX, MID_GREEN_HEX, + LIGHT_GREEN_HEX, PURPLE_HEX, AMBER_HEX, RED_HEX] + +# PD comparison colours +MA_HEX = DEEP_BLUE_HEX # PD1 = Deep Blue +MG_HEX = SKY_BLUE_HEX # PD2 = Sky Blue + + +# ═══════════════════════════════════════════════ +# DATA LOADING & PREPARATION +# ═══════════════════════════════════════════════ + +def load_and_prepare(filepath, start_date, split_date): + """Load Excel, filter by date range, add PD column.""" + df = pd.read_excel(filepath) + df["Event Date"] = pd.to_datetime(df["Event Date"]) + df = df[df["Event Date"] >= pd.Timestamp(start_date)].copy() + df["Year"] = df["Event Date"].dt.year + df["Month"] = df["Event Date"].dt.month + df["MonthName"] = df["Event Date"].dt.strftime("%b") + df["DOW"] = df["Event Date"].dt.day_name() + df["YearMonth"] = df["Event Date"].dt.to_period("M") + df["PD"] = df["Event Date"].apply( + lambda x: "pd1" if x < pd.Timestamp(split_date) else "pd2" + ) + return df + + +def get_body_parts(series): + """Split multi-value body part entries and normalise.""" + parts = [] + for val in series.dropna(): + for part in str(val).split(","): + part = part.strip() + if part and "unspecified" not in part.lower(): + parts.append(part) + return pd.Series(parts) + + +# ═══════════════════════════════════════════════ +# CHART GENERATION +# ═══════════════════════════════════════════════ + +def _save(fig, path): + fig.tight_layout() + fig.savefig(path, dpi=200, bbox_inches="tight", facecolor="white") + plt.close(fig) + + +def _setup_chart_style(): + """Configure matplotlib to use Source Sans Pro if available.""" + import matplotlib.font_manager as fm + available = [f.name for f in fm.fontManager.ttflist] + if "Source Sans Pro" in available: + plt.rcParams["font.family"] = "Source Sans Pro" + elif "Source Sans 3" in available: + plt.rcParams["font.family"] = "Source Sans 3" + else: + plt.rcParams["font.family"] = "sans-serif" + + +def generate_charts(df, pd1_name, pd2_name, split_date, output_dir): + """Generate all comparison charts, return dict of paths.""" + _setup_chart_style() + charts = {} + pd1 = df[df["PD"] == "pd1"] + pd2 = df[df["PD"] == "pd2"] + + # Consequence severity colours per DESIGN.md + CONS_COLORS = [DARK_GREEN_HEX, AMBER_HEX, RED_HEX, PURPLE_HEX] + + # 1. Monthly trend by PD + fig, ax = plt.subplots(figsize=(10, 4)) + start_period = df["Event Date"].min().to_period("M") + end_period = df["Event Date"].max().to_period("M") + months_all = pd.period_range(start_period, end_period, freq="M") + monthly = df.groupby(["YearMonth", "PD"]).size().unstack(fill_value=0).reindex(months_all, fill_value=0) + x = range(len(months_all)) + labels = [m.strftime("%b %y") for m in months_all] + ma_vals = monthly.get("pd1", pd.Series(0, index=months_all)).values + mg_vals = monthly.get("pd2", pd.Series(0, index=months_all)).values + ax.bar(x, ma_vals, color=MA_HEX, label=pd1_name, width=0.7, alpha=0.9) + ax.bar(x, mg_vals, bottom=ma_vals, color=MG_HEX, label=pd2_name, width=0.7, alpha=0.9) + split_m = pd.Timestamp(split_date).to_period("M") + if split_m in months_all: + trans_idx = list(months_all).index(split_m) + ax.axvline(x=trans_idx - 0.5, color=RED_HEX, linestyle="--", linewidth=1.5, alpha=0.7) + ax.text(trans_idx - 0.3, max(max(ma_vals + mg_vals), 1) * 0.95, "PD Transition", + fontsize=9, color=RED_HEX, ha="left") + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8) + ax.set_title("Monthly Events by Project Director", fontsize=14, fontweight="bold", color=MA_HEX) + ax.set_ylabel("Events") + ax.legend(loc="upper right") + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + p = os.path.join(output_dir, "monthly_by_pd.png") + _save(fig, p) + charts["monthly_by_pd"] = p + + # 2. Event type comparison + evt_types = df["Event Type"].value_counts().index[:8] + ma_evt = pd1["Event Type"].value_counts().reindex(evt_types, fill_value=0) + mg_evt = pd2["Event Type"].value_counts().reindex(evt_types, fill_value=0) + fig, ax = plt.subplots(figsize=(9, 5)) + y = np.arange(len(evt_types)) + h = 0.35 + ax.barh(y - h / 2, ma_evt.values, h, label=pd1_name, color=MA_HEX) + ax.barh(y + h / 2, mg_evt.values, h, label=pd2_name, color=MG_HEX) + ax.set_yticks(y) + ax.set_yticklabels(evt_types, fontsize=10) + ax.invert_yaxis() + ax.set_title("Event Types by Project Director", fontsize=14, fontweight="bold", color=MA_HEX) + ax.legend() + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + for i, (v1, v2) in enumerate(zip(ma_evt.values, mg_evt.values)): + ax.text(v1 + 0.2, i - h / 2, str(v1), va="center", fontsize=9, color=MA_HEX) + ax.text(v2 + 0.2, i + h / 2, str(v2), va="center", fontsize=9, color=MG_HEX) + p = os.path.join(output_dir, "event_type_by_pd.png") + _save(fig, p) + charts["event_type_by_pd"] = p + + # 3. Consequence comparison (pie charts) + cons_order = ["Negligible", "Minor", "Moderate", "Major"] + fig, axes = plt.subplots(1, 2, figsize=(9, 3.5)) + for ax, sub, title in zip(axes, [pd1, pd2], [pd1_name, pd2_name]): + data = sub["Actual Consequence"].value_counts().reindex(cons_order, fill_value=0) + ax.pie(data.values, labels=cons_order, autopct="%1.0f%%", colors=CONS_COLORS, startangle=140, + textprops={"fontsize": 9}) + ax.set_title(title, fontsize=13, fontweight="bold", color=MA_HEX) + p = os.path.join(output_dir, "consequence_by_pd.png") + _save(fig, p) + charts["consequence_by_pd"] = p + + # 4. Day of week + dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] + fig, ax = plt.subplots(figsize=(9, 4)) + x_arr = np.arange(len(dow_order)) + w = 0.35 + ma_d = pd1["DOW"].value_counts().reindex(dow_order, fill_value=0) + mg_d = pd2["DOW"].value_counts().reindex(dow_order, fill_value=0) + b1 = ax.bar(x_arr - w / 2, ma_d.values, w, label=pd1_name, color=MA_HEX) + b2 = ax.bar(x_arr + w / 2, mg_d.values, w, label=pd2_name, color=MG_HEX) + ax.set_xticks(x_arr) + ax.set_xticklabels([d[:3] for d in dow_order]) + ax.set_title("Events by Day of Week", fontsize=14, fontweight="bold", color=MA_HEX) + ax.legend() + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + for b in b1: + if b.get_height() > 0: + ax.text(b.get_x() + b.get_width() / 2, b.get_height() + 0.3, str(int(b.get_height())), + ha="center", fontsize=9) + for b in b2: + if b.get_height() > 0: + ax.text(b.get_x() + b.get_width() / 2, b.get_height() + 0.3, str(int(b.get_height())), + ha="center", fontsize=9) + p = os.path.join(output_dir, "dow_by_pd.png") + _save(fig, p) + charts["dow_by_pd"] = p + + # 5. Root cause + rc_cats = ["External Factors", "People", "Production / Delivery", "Process", "Planning", "Providers"] + fig, ax = plt.subplots(figsize=(9, 4)) + y = np.arange(len(rc_cats)) + h = 0.35 + ma_rc = pd1["Root Cause Category"].value_counts().reindex(rc_cats, fill_value=0) + mg_rc = pd2["Root Cause Category"].value_counts().reindex(rc_cats, fill_value=0) + ax.barh(y - h / 2, ma_rc.values, h, label=pd1_name, color=MA_HEX) + ax.barh(y + h / 2, mg_rc.values, h, label=pd2_name, color=MG_HEX) + ax.set_yticks(y) + ax.set_yticklabels(rc_cats, fontsize=10) + ax.invert_yaxis() + ax.set_title("Root Cause Categories by Project Director", fontsize=14, fontweight="bold", color=MA_HEX) + ax.legend() + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + p = os.path.join(output_dir, "rootcause_by_pd.png") + _save(fig, p) + charts["rootcause_by_pd"] = p + + # 6. CRP comparison + crp_all = df["CRPInvolved"].value_counts() + crp_active = crp_all[~crp_all.index.isin(["None Identified", "Under Investigation"])].head(8) + crp_cats = crp_active.index + fig, ax = plt.subplots(figsize=(9, 4.5)) + y = np.arange(len(crp_cats)) + ma_c = pd1["CRPInvolved"].value_counts().reindex(crp_cats, fill_value=0) + mg_c = pd2["CRPInvolved"].value_counts().reindex(crp_cats, fill_value=0) + ax.barh(y - h / 2, ma_c.values, h, label=pd1_name, color=MA_HEX) + ax.barh(y + h / 2, mg_c.values, h, label=pd2_name, color=MG_HEX) + ax.set_yticks(y) + ax.set_yticklabels(crp_cats, fontsize=9) + ax.invert_yaxis() + ax.set_title("Critical Risk Protocols by Project Director", fontsize=14, fontweight="bold", color=MA_HEX) + ax.legend() + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + p = os.path.join(output_dir, "crp_by_pd.png") + _save(fig, p) + charts["crp_by_pd"] = p + + # 7. Body parts + bp_series = get_body_parts(df["Bodily Location"]) + if len(bp_series) > 0: + bp_top = bp_series.value_counts().head(10) + fig, ax = plt.subplots(figsize=(8, 4)) + ax.barh(range(len(bp_top)), bp_top.values, color=DARK_GREEN_HEX) + ax.set_yticks(range(len(bp_top))) + ax.set_yticklabels(bp_top.index, fontsize=10) + ax.invert_yaxis() + for i, v in enumerate(bp_top.values): + ax.text(v + 0.1, i, str(v), va="center", fontsize=11, fontweight="bold") + ax.set_title("Top Injured Body Parts", fontsize=14, fontweight="bold", color=MA_HEX) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + p = os.path.join(output_dir, "body_parts.png") + _save(fig, p) + charts["body_parts"] = p + + return charts + + +# ═══════════════════════════════════════════════ +# DOCX GENERATION +# ═══════════════════════════════════════════════ + +def _set_cell_shading(cell, color_hex): + """Apply background shading to a table cell.""" + shading = parse_xml(f'') + cell._tc.get_or_add_tcPr().append(shading) + + +def _add_styled_table(doc, headers, rows, col_widths_inches): + """Add a formatted comparison table.""" + table = doc.add_table(rows=1 + len(rows), cols=len(headers)) + table.alignment = WD_TABLE_ALIGNMENT.LEFT + table.style = "Table Grid" + + # Header row + for i, h in enumerate(headers): + cell = table.rows[0].cells[i] + cell.text = "" + p = cell.paragraphs[0] + run = p.add_run(h) + run.bold = True + run.font.size = Pt(9) + run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) + run.font.name = "Source Sans Pro" + _set_cell_shading(cell, "0b3254") + + # Data rows + for ri, row in enumerate(rows): + for ci, val in enumerate(row): + cell = table.rows[ri + 1].cells[ci] + cell.text = "" + p = cell.paragraphs[0] + run = p.add_run(str(val)) + run.font.size = Pt(9) + run.font.name = "Source Sans Pro" + bg = "F0F5FA" if ri % 2 == 0 else "FFFFFF" + _set_cell_shading(cell, bg) + + # Set column widths + for i, w in enumerate(col_widths_inches): + for row in table.rows: + row.cells[i].width = Inches(w) + + return table + + +def generate_docx(df, pd1_name, pd2_name, split_date, charts, output_dir): + """Generate the full DOCX report.""" + doc = Document() + + # Set default font + style = doc.styles["Normal"] + style.font.name = "Source Sans Pro" + style.font.size = Pt(11) + + # Heading styles + for level, size, color in [(1, 16, NAVY), (2, 13, TEAL)]: + hs = doc.styles[f"Heading {level}"] + hs.font.name = "Source Sans Pro" + hs.font.size = Pt(size) + hs.font.color.rgb = color + hs.font.bold = True + + pd1 = df[df["PD"] == "pd1"] + pd2 = df[df["PD"] == "pd2"] + total = len(df) + pd1_months = max(1, (pd.Timestamp(split_date) - df["Event Date"].min()).days / 30.44) + pd2_months = max(1, (df["Event Date"].max() - pd.Timestamp(split_date)).days / 30.44 + 1) + + pd1_start = pd1["Event Date"].min().strftime("%b %Y") if len(pd1) > 0 else "N/A" + pd1_end = pd1["Event Date"].max().strftime("%b %Y") if len(pd1) > 0 else "N/A" + pd2_start = pd2["Event Date"].min().strftime("%b %Y") if len(pd2) > 0 else "N/A" + pd2_end = pd2["Event Date"].max().strftime("%b %Y") if len(pd2) > 0 else "N/A" + + # ── Title page ── + doc.add_paragraph("") + doc.add_paragraph("") + p = doc.add_paragraph() + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = p.add_run("SHEQ Incident Analysis") + run.font.size = Pt(28) + run.bold = True + run.font.name = "Source Sans Pro" + run.font.color.rgb = NAVY + + p = doc.add_paragraph() + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = p.add_run("Far North Waters Project") + run.font.size = Pt(16) + run.font.name = "Source Sans Pro" + run.font.color.rgb = TEAL + + p = doc.add_paragraph() + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = p.add_run(f"{pd1_start} \u2013 {pd2_end} (MTD)") + run.font.size = Pt(14) + run.font.name = "Source Sans Pro" + run.font.color.rgb = TEAL + + doc.add_paragraph("") + p = doc.add_paragraph() + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = p.add_run("Performance by Project Director") + run.font.size = Pt(13) + run.bold = True + run.font.name = "Source Sans Pro" + run.font.color.rgb = NAVY + + p = doc.add_paragraph() + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = p.add_run(f"{pd1_name} ") + run.bold = True + run.font.color.rgb = NAVY + run = p.add_run(f"({pd1_start} \u2013 {pd1_end}) | ") + run.font.color.rgb = GREY + run = p.add_run(f"{pd2_name} ") + run.bold = True + run.font.color.rgb = TEAL + run = p.add_run(f"({pd2_start} \u2013 {pd2_end})") + run.font.color.rgb = GREY + + doc.add_paragraph("") + p = doc.add_paragraph() + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = p.add_run("Ventia \u2022 Infrastructure Services \u2022 Water & Environmental Services") + run.font.size = Pt(10) + run.font.color.rgb = GREY + + doc.add_page_break() + + # ── Helper functions ── + def h1(text): + doc.add_heading(text, level=1) + + def h2(text): + doc.add_heading(text, level=2) + + def text(t, bold=False): + p = doc.add_paragraph() + run = p.add_run(t) + run.bold = bold + return p + + def bullet(t): + p = doc.add_paragraph(t, style="List Bullet") + return p + + def add_chart(name, width=5.5): + if name in charts: + doc.add_picture(charts[name], width=Inches(width)) + + # Helper for injury classification + def _inj_class(sub): + return sub["Ventia Injury Classification"].value_counts() + + # ═══════════════════════════════════════════ + # 1. EXECUTIVE SUMMARY + # ═══════════════════════════════════════════ + h1("1. Executive Summary") + text(f"This report analyses {total} SHEQ events recorded for the Far North Waters project " + f"from {pd1_start} to {pd2_end} (month-to-date). The analysis is structured around " + f"two Project Director tenures to enable performance comparison:") + + pd1_inj = pd1[pd1["Event Type"] == "Injury/Illness Sustained"] + pd2_inj = pd2[pd2["Event Type"] == "Injury/Illness Sustained"] + pd1_mv = pd1[pd1["Event Type"] == "Motor Vehicle"] + pd2_mv = pd2[pd2["Event Type"] == "Motor Vehicle"] + pd1_ic = _inj_class(pd1) + pd2_ic = _inj_class(pd2) + pd1_cc = len(pd1[pd1["Event Type"] == "Close Call"]) + pd2_cc = len(pd2[pd2["Event Type"] == "Close Call"]) + pd1_mod = len(pd1[pd1["Actual Consequence"].isin(["Moderate", "Major", "Substantial"])]) + pd2_mod = len(pd2[pd2["Actual Consequence"].isin(["Moderate", "Major", "Substantial"])]) + + _add_styled_table(doc, + ["", pd1_name, pd2_name], + [ + ["Period", f"{pd1_start} \u2013 {pd1_end}", f"{pd2_start} \u2013 {pd2_end}"], + ["Duration", f"{pd1_months:.0f} months", f"{pd2_months:.0f} months"], + ["Total Events", str(len(pd1)), str(len(pd2))], + ["Events per Month", f"{len(pd1)/pd1_months:.1f}", f"{len(pd2)/pd2_months:.1f}"], + ["Injuries", f"{len(pd1_inj)} ({len(pd1_inj)/max(len(pd1),1)*100:.1f}%)", + f"{len(pd2_inj)} ({len(pd2_inj)/max(len(pd2),1)*100:.1f}%)"], + ["Motor Vehicle Events", f"{len(pd1_mv)} ({len(pd1_mv)/max(len(pd1),1)*100:.1f}%)", + f"{len(pd2_mv)} ({len(pd2_mv)/max(len(pd2),1)*100:.1f}%)"], + ["Lost Time Injuries", str(pd1_ic.get("Lost Time Injury", 0)), str(pd2_ic.get("Lost Time Injury", 0))], + ["First Aid Treatments", str(pd1_ic.get("First Aid Treatment", 0)), str(pd2_ic.get("First Aid Treatment", 0))], + ["Close Calls", f"{pd1_cc} ({pd1_cc/max(len(pd1),1)*100:.1f}%)", + f"{pd2_cc} ({pd2_cc/max(len(pd2),1)*100:.1f}%)"], + ["Moderate+ Consequence", f"{pd1_mod} ({pd1_mod/max(len(pd1),1)*100:.1f}%)", + f"{pd2_mod} ({pd2_mod/max(len(pd2),1)*100:.1f}%)"], + ["Median Days to Investigate", f"{pd1['Days to Investigate'].dropna().median():.0f}", + f"{pd2['Days to Investigate'].dropna().median():.0f}"], + ["Median Days to Close", f"{pd1['Days to Close'].dropna().median():.0f}", + f"{pd2['Days to Close'].dropna().median():.0f}"], + ], + [2.0, 2.2, 2.3] + ) + + doc.add_paragraph("") + h2("Key Comparative Findings") + + rate1 = len(pd1) / pd1_months + rate2 = len(pd2) / pd2_months + bullet(f"Event rate {'increased' if rate2 > rate1 else 'decreased'} under {pd2_name} " + f"({rate2:.1f}/month vs {rate1:.1f}/month), with Moderate+ consequences at " + f"{pd2_mod/max(len(pd2),1)*100:.1f}% vs {pd1_mod/max(len(pd1),1)*100:.1f}%.") + bullet(f"Motor vehicle events: {len(pd2_mv)} under {pd2_name} vs {len(pd1_mv)} under {pd1_name} " + f"({len(pd2_mv)/max(len(pd2),1)*100:.1f}% vs {len(pd1_mv)/max(len(pd1),1)*100:.1f}%).") + bullet(f"Close call reporting: {pd2_cc/max(len(pd2),1)*100:.1f}% under {pd2_name} vs " + f"{pd1_cc/max(len(pd1),1)*100:.1f}% under {pd1_name}.") + + lti1 = pd1_ic.get("Lost Time Injury", 0) + lti2 = pd2_ic.get("Lost Time Injury", 0) + if lti2 > lti1: + bullet(f"{lti2} Lost Time Injuries under {pd2_name} compared to {lti1} under {pd1_name}.") + + doc.add_page_break() + + # ═══════════════════════════════════════════ + # 2. MONTHLY TRENDS + # ═══════════════════════════════════════════ + h1("2. Monthly Event Trends") + text("The chart below shows monthly event counts across both Project Director periods.") + add_chart("monthly_by_pd", 5.8) + doc.add_page_break() + + # ═══════════════════════════════════════════ + # 3. EVENT TYPE COMPARISON + # ═══════════════════════════════════════════ + h1("3. Event Type Comparison") + add_chart("event_type_by_pd", 5.5) + + evt_types = df["Event Type"].value_counts().index + evt_rows = [] + for e in evt_types: + c1 = len(pd1[pd1["Event Type"] == e]) + c2 = len(pd2[pd2["Event Type"] == e]) + evt_rows.append([e, str(c1), f"{c1/max(len(pd1),1)*100:.1f}%", + str(c2), f"{c2/max(len(pd2),1)*100:.1f}%"]) + _add_styled_table(doc, ["Event Type", pd1_name, "%", pd2_name, "%"], evt_rows, + [2.0, 1.1, 0.8, 1.0, 0.8]) + + doc.add_paragraph("") + text("Notable shifts:", bold=True) + # Auto-detect biggest shifts + for e in evt_types: + c1 = len(pd1[pd1["Event Type"] == e]) + c2 = len(pd2[pd2["Event Type"] == e]) + pct1 = c1 / max(len(pd1), 1) * 100 + pct2 = c2 / max(len(pd2), 1) * 100 + if abs(pct2 - pct1) > 5: + direction = "increased" if pct2 > pct1 else "decreased" + bullet(f"{e} {direction}: {pct1:.1f}% \u2192 {pct2:.1f}% ({c1} \u2192 {c2} events).") + + doc.add_page_break() + + # ═══════════════════════════════════════════ + # 4. INJURY ANALYSIS + # ═══════════════════════════════════════════ + h1("4. Injury Analysis") + h2("4.1 Injury Classification") + inj_classes = ["First Aid Treatment", "Report Only", "Non-Work Related", + "Lost Time Injury", "Medical Treatment Injury"] + inj_rows = [[c, str(pd1_ic.get(c, 0)), str(pd2_ic.get(c, 0))] for c in inj_classes] + _add_styled_table(doc, ["Classification", pd1_name, pd2_name], inj_rows, [2.5, 1.8, 1.8]) + + h2("4.2 Body Parts Injured") + add_chart("body_parts", 5.0) + + # Body part comparison + bp1 = get_body_parts(pd1["Bodily Location"]).value_counts().head(6) + bp2 = get_body_parts(pd2["Bodily Location"]).value_counts().head(6) + all_bp = list(dict.fromkeys(list(bp1.index) + list(bp2.index)))[:8] + bp_rows = [[bp, str(bp1.get(bp, 0)), str(bp2.get(bp, 0))] for bp in all_bp] + _add_styled_table(doc, ["Body Part", pd1_name, pd2_name], bp_rows, [2.5, 1.8, 1.8]) + + doc.add_page_break() + + # ═══════════════════════════════════════════ + # 5. CONSEQUENCE ANALYSIS + # ═══════════════════════════════════════════ + h1("5. Consequence Analysis") + add_chart("consequence_by_pd", 5.5) + + cons_order = ["Negligible", "Minor", "Moderate", "Major"] + cons_rows = [] + for c in cons_order: + c1 = len(pd1[pd1["Actual Consequence"] == c]) + c2 = len(pd2[pd2["Actual Consequence"] == c]) + cons_rows.append([c, str(c1), f"{c1/max(len(pd1),1)*100:.1f}%", + str(c2), f"{c2/max(len(pd2),1)*100:.1f}%"]) + _add_styled_table(doc, ["Consequence", pd1_name, "%", pd2_name, "%"], cons_rows, + [1.5, 1.0, 0.8, 1.0, 0.8]) + + doc.add_page_break() + + # ═══════════════════════════════════════════ + # 6. CRP & ROOT CAUSE + # ═══════════════════════════════════════════ + h1("6. Critical Risk Protocols & Root Causes") + h2("6.1 CRP Comparison") + add_chart("crp_by_pd", 5.5) + + h2("6.2 Root Cause Comparison") + add_chart("rootcause_by_pd", 5.5) + + rc_cats = ["External Factors", "People", "Production / Delivery", "Process", "Planning", "Providers"] + rc_rows = [] + for r in rc_cats: + c1 = len(pd1[pd1["Root Cause Category"] == r]) + c2 = len(pd2[pd2["Root Cause Category"] == r]) + t1 = pd1["Root Cause Category"].notna().sum() + t2 = pd2["Root Cause Category"].notna().sum() + rc_rows.append([r, str(c1), f"{c1/max(t1,1)*100:.1f}%", + str(c2), f"{c2/max(t2,1)*100:.1f}%"]) + _add_styled_table(doc, ["Root Cause", pd1_name, "%", pd2_name, "%"], rc_rows, + [2.0, 1.1, 0.8, 1.0, 0.8]) + + doc.add_page_break() + + # ═══════════════════════════════════════════ + # 7. TIMING PATTERNS + # ═══════════════════════════════════════════ + h1("7. Timing Patterns") + add_chart("dow_by_pd", 5.5) + + doc.add_page_break() + + # ═══════════════════════════════════════════ + # 8. INVESTIGATION PERFORMANCE + # ═══════════════════════════════════════════ + h1("8. Investigation Performance") + inv_rows = [ + ["Median Days to Investigate", f"{pd1['Days to Investigate'].dropna().median():.0f}", + f"{pd2['Days to Investigate'].dropna().median():.0f}"], + ["Mean Days to Investigate", f"{pd1['Days to Investigate'].dropna().mean():.1f}", + f"{pd2['Days to Investigate'].dropna().mean():.1f}"], + ["Median Days to Close", f"{pd1['Days to Close'].dropna().median():.0f}", + f"{pd2['Days to Close'].dropna().median():.0f}"], + ["Mean Days to Close", f"{pd1['Days to Close'].dropna().mean():.1f}", + f"{pd2['Days to Close'].dropna().mean():.1f}"], + ["Events Closed", f"{(pd1['Status']=='Closed').sum()} ({(pd1['Status']=='Closed').sum()/max(len(pd1),1)*100:.0f}%)", + f"{(pd2['Status']=='Closed').sum()} ({(pd2['Status']=='Closed').sum()/max(len(pd2),1)*100:.0f}%)"], + ["Events Open", str((pd1["Status"] == "Open").sum()), str((pd2["Status"] == "Open").sum())], + ] + _add_styled_table(doc, ["Metric", pd1_name, pd2_name], inv_rows, [2.5, 1.8, 1.8]) + + doc.add_page_break() + + # ═══════════════════════════════════════════ + # 9. RECOMMENDATIONS + # ═══════════════════════════════════════════ + h1("9. Key Findings & Recommendations") + + h2(f"9.1 Areas Requiring Attention ({pd2_name} Period)") + if len(pd2_mv) > len(pd1_mv): + bullet("Motor vehicle events have increased \u2014 reinforce journey management plans and reversing protocols.") + if pd2_mod / max(len(pd2), 1) > pd1_mod / max(len(pd1), 1): + bullet("Moderate+ consequence events have increased \u2014 investigate whether controls are being bypassed.") + if pd2_cc / max(len(pd2), 1) < pd1_cc / max(len(pd1), 1): + bullet("Close call reporting has declined \u2014 implement reporting targets and recognise reporters.") + if lti2 > lti1: + bullet(f"{lti2} LTIs under {pd2_name} vs {lti1} under {pd1_name} \u2014 review circumstances and RTW processes.") + + h2("9.2 Systemic Issues (Both Periods)") + bullet("Lower back injuries from manual handling at pump stations persist \u2014 engineering controls needed.") + bullet("Third Party/Public Liability events remain a large category, driven by aging infrastructure.") + bullet("Wednesday remains the peak risk day \u2014 consider targeted mid-week safety interventions.") + + h2("9.3 Recommended Actions") + bullet("Set a close-call reporting KPI (minimum 10% of all events) and track monthly.") + bullet("Implement a motor vehicle safety campaign focusing on reversing and traffic management.") + bullet("Schedule quarterly PD safety performance reviews using this report format.") + + # ── Save ── + output_path = os.path.join(output_dir, "SHEQ_PD_Comparison.docx") + doc.save(output_path) + return output_path + + +# ═══════════════════════════════════════════════ +# MAIN ENTRY POINT +# ═══════════════════════════════════════════════ + +def run_analysis(filepath, start_date, split_date, pd1_name, pd2_name, output_dir="output"): + """Run the full analysis pipeline.""" + os.makedirs(output_dir, exist_ok=True) + + print(f"Loading data from {filepath}...") + df = load_and_prepare(filepath, start_date, split_date) + print(f" {len(df)} events loaded ({df['Event Date'].min().date()} to {df['Event Date'].max().date()})") + print(f" {pd1_name}: {(df['PD']=='pd1').sum()} events") + print(f" {pd2_name}: {(df['PD']=='pd2').sum()} events") + + print("Generating charts...") + charts = generate_charts(df, pd1_name, pd2_name, split_date, output_dir) + print(f" {len(charts)} charts created") + + print("Generating DOCX report...") + docx_path = generate_docx(df, pd1_name, pd2_name, split_date, charts, output_dir) + print(f" Report saved to {docx_path}") + + return docx_path + + +if __name__ == "__main__": + run_analysis( + filepath="All_Events__5_.xlsx", + start_date="2024-01-01", + split_date="2025-04-01", + pd1_name="Matthew Arthur", + pd2_name="Manga", + output_dir="output" + ) diff --git a/analysis_engine.py b/analysis_engine.py new file mode 100644 index 0000000..dfa4c48 --- /dev/null +++ b/analysis_engine.py @@ -0,0 +1,2251 @@ +""" +analysis_engine.py — Core analytical layer for the SHEQ reporting tool. + +Accepts normalised DataFrames from data_loader and produces a structured +results dict consumed by report_builder. All analysis is performed here; +report_builder only formats and writes. + +Public API +---------- +run_full_analysis(events, safety_energy, llc, start_date, split_date, + pd1_name, pd2_name, output_dir) -> AnalysisResults +""" + +from __future__ import annotations + +import logging +import os +import re +import warnings +from collections import Counter +from dataclasses import dataclass, field +from typing import Any, Optional + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import matplotlib.ticker as mticker +import numpy as np +import pandas as pd + +from config import ( + CHART_PALETTE, + DEEP_BLUE, SKY_BLUE, DARK_GREEN, MID_GREEN, LIGHT_GREEN, + PURPLE, AMBER, RED, MUTED, + CONSEQUENCE_ORDER, CONSEQUENCE_SERIOUS, + LEADING_ACTIVITY_TYPES, ACTIVITY_COLOURS, + AT_RISK_KEYWORDS, + CORR_MIN_MONTHS, LEADER_MIN_ACTIVITIES, + TWO_YEAR_WINDOW_MONTHS, QUALITY_SCORE_BANDS, +) + +log = logging.getLogger(__name__) +warnings.filterwarnings("ignore", category=UserWarning) + + +# ───────────────────────────────────────────────────────────────────────────── +# Result container +# ───────────────────────────────────────────────────────────────────────────── + +@dataclass +class AnalysisResults: + """All analysis outputs passed to report_builder.""" + params: dict[str, Any] = field(default_factory=dict) + data_quality: dict[str, Any] = field(default_factory=dict) + events_summary: dict[str, Any] = field(default_factory=dict) + leading_summary: dict[str, Any] = field(default_factory=dict) + trends: dict[str, Any] = field(default_factory=dict) + effectiveness: dict[str, Any] = field(default_factory=dict) + at_risk: dict[str, Any] = field(default_factory=dict) + se_events_rel: dict[str, Any] = field(default_factory=dict) + focus_areas: dict[str, Any] = field(default_factory=dict) + charts: dict[str, str] = field(default_factory=dict) # name → file path + recommendations: list[str] = field(default_factory=list) + caveats: list[str] = field(default_factory=list) + + +# ───────────────────────────────────────────────────────────────────────────── +# Chart helpers +# ───────────────────────────────────────────────────────────────────────────── + +def _setup_style() -> None: + """Apply brand-aligned matplotlib defaults.""" + import matplotlib.font_manager as fm + available = {f.name for f in fm.fontManager.ttflist} + if "Source Sans Pro" in available: + plt.rcParams["font.family"] = "Source Sans Pro" + elif "Source Sans 3" in available: + plt.rcParams["font.family"] = "Source Sans 3" + else: + plt.rcParams["font.family"] = "sans-serif" + plt.rcParams.update({ + "axes.spines.top": False, + "axes.spines.right": False, + "axes.grid": True, + "grid.alpha": 0.3, + "grid.linestyle": "--", + }) + + +def _save(fig: plt.Figure, path: str) -> None: + fig.tight_layout() + fig.savefig(path, dpi=180, bbox_inches="tight", facecolor="white") + plt.close(fig) + + +def _month_labels(periods: pd.PeriodIndex) -> list[str]: + return [p.strftime("%b %y") for p in periods] + + +QUALITY_TEXT_COLUMNS = [ + "module_name", + "llc_topic", + "at_risk_obs", + "positive_obs", + "at_risk_crp", + "Immediate Actions Taken / Comments", + "Instruction", + "Top practices", + "Top improvement opportunities", + "Review & Action", + "Best practices shared with site leaders", + "Activity/Task", + "Custom", +] + +INPUT_DEPTH_BASE_FIELDS = [ + "module_name", + "module_prefix", + "leader", + "business_unit", + "project", + "location", + "shift", +] + +INPUT_DEPTH_OPTIONAL_FIELDS = [ + "participants", + "time_spent", + "at_risk_crp", + "llc_topic", + "at_risk_obs", + "positive_obs", + "find_fix", + "Immediate Actions Taken / Comments", + "Instruction", + "Top practices", + "Top improvement opportunities", + "Review & Action", + "Best practices shared with site leaders", + "Activity/Task", + "Custom", +] + +INPUT_DEPTH_NUMERIC_FIELDS = [ + "at_risk_aspects", + "total_questions", + "actions", + "atl_actions", +] + +SEMANTIC_EMPTY_STRINGS = { + "", "n/a", "na", "nan", "nil", "none", "null", "unknown", "not applicable", + "no", "no risk identified", "no at risk identified", "no at risk situations identified", +} + +ACTION_WORDS = { + "action", "address", "brief", "coach", "control", "correct", "escalate", + "fix", "follow", "improve", "implement", "isolate", "monitor", "plan", + "rectify", "reinforce", "repair", "replace", "review", "stop", "train", + "update", "verify", +} + +LEARNING_WORDS = { + "awareness", "coach", "coaching", "discuss", "discussed", "education", + "explained", "feedback", "learn", "learning", "lesson", "mentor", + "reinforce", "reinforced", "reminded", "shared", "understand", +} + +REACTIVE_WORDS = { + "breach", "defect", "failure", "incident", "issue", "non-compliance", + "not in place", "overdue", "unsafe", "failed", +} + +PREVENTIVE_WORDS = { + "before", "brief", "coaching", "planned", "pre-start", "prepare", + "proactive", "reinforce", "review", "verify", +} + +GENERIC_PATTERNS = [ + "all good", + "n/a", + "na", + "nil", + "none", + "no issues", + "no at risk situations identified", + "no at risk identified", + "nothing noted", + "routine check", +] + + +def _safe_pct(numerator: float, denominator: float) -> float: + if denominator in (0, 0.0) or pd.isna(denominator): + return 0.0 + return float(numerator) / float(denominator) * 100.0 + + +def _normalise_text(text: Any) -> str: + if pd.isna(text): + return "" + return re.sub(r"\s+", " ", str(text)).strip() + + +def _is_meaningful_text(value: Any) -> bool: + text = _normalise_text(value).lower() + if not text: + return False + if text in SEMANTIC_EMPTY_STRINGS: + return False + return True + + +def _tokenise(text: str) -> list[str]: + return re.findall(r"[a-zA-Z][a-zA-Z0-9/&'-]+", text.lower()) + + +def _theme_matches(text: str) -> set[str]: + tl = text.lower() + return { + theme for theme, keywords in AT_RISK_KEYWORDS.items() + if any(kw in tl for kw in keywords) + } + + +def _top_dict(series: pd.Series, limit: int = 10) -> dict[str, int]: + if series.empty: + return {} + cleaned = series.dropna().astype(str).str.strip() + cleaned = cleaned[cleaned.ne("") & cleaned.ne("nan")] + return cleaned.value_counts().head(limit).to_dict() + + +def _pick_window_start(max_date: pd.Timestamp) -> pd.Timestamp: + return (max_date.to_period("M") - (TWO_YEAR_WINDOW_MONTHS - 1)).to_timestamp() + + +def _build_quality_frame(se_window: pd.DataFrame) -> pd.DataFrame: + """Create practical quality proxies for each Safety Energy record.""" + if se_window.empty: + return se_window.copy() + + df = se_window.copy() + text_cols = [c for c in QUALITY_TEXT_COLUMNS if c in df.columns] + + if text_cols: + df["_text_blob"] = ( + df[text_cols] + .fillna("") + .astype(str) + .agg(" ".join, axis=1) + .map(_normalise_text) + ) + else: + df["_text_blob"] = "" + + df["_tokens"] = df["_text_blob"].map(_tokenise) + df["_word_count"] = df["_tokens"].map(len) + df["_unique_words"] = df["_tokens"].map(lambda toks: len(set(toks))) + df["_contains_number"] = df["_text_blob"].str.contains(r"\d", regex=True, na=False) + df["_theme_count"] = df["_text_blob"].map(lambda t: len(_theme_matches(t))) + df["_action_words"] = df["_tokens"].map(lambda toks: len(set(toks) & ACTION_WORDS)) + df["_learning_words"] = df["_tokens"].map(lambda toks: len(set(toks) & LEARNING_WORDS)) + df["_reactive_words"] = df["_text_blob"].str.lower().map( + lambda txt: sum(1 for word in REACTIVE_WORDS if word in txt) + ) + df["_preventive_words"] = df["_text_blob"].str.lower().map( + lambda txt: sum(1 for word in PREVENTIVE_WORDS if word in txt) + ) + df["_generic_flag"] = ( + (df["_word_count"] <= 4) + | df["_text_blob"].str.lower().map(lambda txt: any(p in txt for p in GENERIC_PATTERNS)) + ) + + total_q = pd.to_numeric(df.get("total_questions"), errors="coerce").fillna(0) + at_risk = pd.to_numeric(df.get("at_risk_aspects"), errors="coerce").fillna(0) + actions = pd.to_numeric(df.get("actions"), errors="coerce").fillna(0) + atl_actions = pd.to_numeric(df.get("atl_actions"), errors="coerce").fillna(0) + + base_fields = [c for c in INPUT_DEPTH_BASE_FIELDS if c in df.columns] + optional_fields = [c for c in INPUT_DEPTH_OPTIONAL_FIELDS if c in df.columns] + numeric_fields = [c for c in INPUT_DEPTH_NUMERIC_FIELDS if c in df.columns] + + if base_fields: + df["_base_input_count"] = sum(df[col].map(_is_meaningful_text).astype(int) for col in base_fields) + else: + df["_base_input_count"] = 0 + + if optional_fields: + df["_optional_input_count"] = sum(df[col].map(_is_meaningful_text).astype(int) for col in optional_fields) + else: + df["_optional_input_count"] = 0 + + numeric_presence = [] + for col in numeric_fields: + vals = pd.to_numeric(df[col], errors="coerce").fillna(0) + if col == "total_questions": + numeric_presence.append(vals.gt(0).astype(int)) + else: + numeric_presence.append(vals.gt(0).astype(int)) + df["_numeric_input_count"] = sum(numeric_presence) if numeric_presence else 0 + + max_points = max(1, len(base_fields) + len(optional_fields) + len(numeric_fields)) + weighted_points = ( + df["_base_input_count"] * 1.0 + + df["_optional_input_count"] * 1.2 + + df["_numeric_input_count"] * 1.0 + ) + weighted_max = max( + 1.0, + len(base_fields) * 1.0 + len(optional_fields) * 1.2 + len(numeric_fields) * 1.0, + ) + df["input_depth_score"] = np.clip((weighted_points / weighted_max) * 100.0, 0, 100).round(1) + df["input_depth_band"] = np.select( + [ + df["input_depth_score"] >= 60, + df["input_depth_score"] >= 40, + df["input_depth_score"] >= 20, + ], + ["Rich", "Balanced", "Light"], + default="Sparse", + ) + + df["_follow_up_signal"] = ( + (actions + atl_actions > 0) + | df["_text_blob"].str.lower().str.contains("follow up|review|action|close out|escalat", regex=True, na=False) + ) + df["_risk_signal"] = ( + (at_risk > 0) + | df["_theme_count"].gt(0) + | df["_text_blob"].str.lower().str.contains("critical risk|hazard|unsafe|control", regex=True, na=False) + ) + df["_critical_control_signal"] = df.get( + "Was a critical risk identified and controls verified as effective and in place?", + pd.Series(index=df.index, dtype="object"), + ).astype(str).str.lower().str.contains("yes|effective|verified", regex=True, na=False) + + norm_text = ( + df["_text_blob"].str.lower() + .str.replace(r"[^a-z0-9 ]", " ", regex=True) + .str.replace(r"\s+", " ", regex=True) + .str.strip() + ) + freq = norm_text[norm_text.ne("")].value_counts() + df["_duplicate_flag"] = norm_text.map(freq).fillna(0).ge(3) & df["_word_count"].ge(5) + + richness = ( + np.where(df["_word_count"] >= 35, 22, + np.where(df["_word_count"] >= 20, 18, + np.where(df["_word_count"] >= 10, 12, + np.where(df["_word_count"] >= 5, 6, 0)))) + ) + specificity = ( + np.where(df["_unique_words"] >= 18, 10, np.where(df["_unique_words"] >= 10, 6, 2)) + + np.where(df["_contains_number"], 4, 0) + + np.where(df["_theme_count"] >= 2, 4, np.where(df["_theme_count"] == 1, 2, 0)) + ) + action_score = ( + np.where(actions + atl_actions >= 2, 12, np.where(actions + atl_actions == 1, 8, 0)) + + np.where(df["_action_words"] >= 2, 6, np.where(df["_action_words"] == 1, 3, 0)) + ) + learning_score = ( + np.where(df["_learning_words"] >= 2, 10, np.where(df["_learning_words"] == 1, 6, 0)) + + np.where(df["_text_blob"].str.lower().str.contains("best practice|lesson|learning|feedback", regex=True, na=False), 4, 0) + ) + risk_score = ( + np.where(df["_risk_signal"], 8, 0) + + np.where(at_risk >= 2, 6, np.where(at_risk == 1, 3, 0)) + + np.where(df["_critical_control_signal"], 4, 0) + + np.where((total_q > 0) & ((at_risk / total_q.replace(0, np.nan)).fillna(0) >= 0.1), 2, 0) + ) + follow_up_score = ( + np.where(df["_follow_up_signal"], 8, 0) + + np.where(df["_text_blob"].str.lower().str.contains("close out|owner|due|monitor", regex=True, na=False), 4, 0) + ) + penalty = ( + np.where(df["_generic_flag"], 10, 0) + + np.where(df["_duplicate_flag"], 8, 0) + + np.where((df["_word_count"] <= 6) & (~df["_follow_up_signal"]) & (~df["_risk_signal"]), 8, 0) + ) + + df["quality_score"] = np.clip( + richness + specificity + action_score + learning_score + risk_score + follow_up_score - penalty, + 0, 100, + ).astype(int) + + df["meaningful_flag"] = df["quality_score"] >= QUALITY_SCORE_BANDS["meaningful"] + df["high_value_flag"] = df["quality_score"] >= QUALITY_SCORE_BANDS["high_value"] + df["shallow_flag"] = df["quality_score"] <= QUALITY_SCORE_BANDS["shallow"] + df["reactive_flag"] = ( + (df["_reactive_words"] > df["_preventive_words"]) + | ((actions + atl_actions > 0) & at_risk.gt(0)) + ) + df["preventive_flag"] = ( + (df["_preventive_words"] >= df["_reactive_words"]) + & df["_risk_signal"] + & ~df["shallow_flag"] + ) + df["repetitive_flag"] = df["_duplicate_flag"] + + def _band(score: int) -> str: + if score >= QUALITY_SCORE_BANDS["high_value"]: + return "High value" + if score >= QUALITY_SCORE_BANDS["meaningful"]: + return "Meaningful" + if score <= QUALITY_SCORE_BANDS["shallow"]: + return "Shallow" + return "Mixed" + + df["quality_band"] = df["quality_score"].map(_band) + return df + + +def _summarise_quality_slice(df: pd.DataFrame) -> dict[str, Any]: + if df.empty: + return { + "count": 0, + "avg_quality": 0.0, + "avg_input_depth": 0.0, + "meaningful_pct": 0.0, + "high_value_pct": 0.0, + "shallow_pct": 0.0, + "reactive_pct": 0.0, + "preventive_pct": 0.0, + "repetitive_pct": 0.0, + "follow_up_pct": 0.0, + "risk_signal_pct": 0.0, + "rich_input_pct": 0.0, + } + + return { + "count": int(len(df)), + "avg_quality": round(float(df["quality_score"].mean()), 1), + "avg_input_depth": round(float(df["input_depth_score"].mean()), 1), + "meaningful_pct": round(_safe_pct(df["meaningful_flag"].sum(), len(df)), 1), + "high_value_pct": round(_safe_pct(df["high_value_flag"].sum(), len(df)), 1), + "shallow_pct": round(_safe_pct(df["shallow_flag"].sum(), len(df)), 1), + "reactive_pct": round(_safe_pct(df["reactive_flag"].sum(), len(df)), 1), + "preventive_pct": round(_safe_pct(df["preventive_flag"].sum(), len(df)), 1), + "repetitive_pct": round(_safe_pct(df["repetitive_flag"].sum(), len(df)), 1), + "follow_up_pct": round(_safe_pct(df["_follow_up_signal"].sum(), len(df)), 1), + "risk_signal_pct": round(_safe_pct(df["_risk_signal"].sum(), len(df)), 1), + "rich_input_pct": round(_safe_pct((df["input_depth_band"] == "Rich").sum(), len(df)), 1), + } + + +def _summarise_theme_trend(df: pd.DataFrame, recent_months: int = 6) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + if df.empty: + return [], [] + + theme_rows: list[dict[str, Any]] = [] + for _, row in df.iterrows(): + blob = row.get("_text_blob", "") + matches = _theme_matches(blob) + if not matches: + module_name = _normalise_text(row.get("module_name", "")) + if module_name: + matches = {module_name} + for theme in matches: + theme_rows.append({"year_month": row["year_month"], "theme": theme}) + + if not theme_rows: + return [], [] + + theme_df = pd.DataFrame(theme_rows) + monthly = ( + theme_df.groupby(["theme", "year_month"]).size() + .unstack(fill_value=0) + .sort_index(axis=1) + ) + recent_cols = monthly.columns[-recent_months:] + prior_cols = monthly.columns[-(recent_months * 2):-recent_months] + if len(recent_cols) == 0: + return [], [] + + rows: list[dict[str, Any]] = [] + for theme, vals in monthly.iterrows(): + recent_avg = float(vals[recent_cols].mean()) + prior_avg = float(vals[prior_cols].mean()) if len(prior_cols) else 0.0 + delta = recent_avg - prior_avg + if delta == 0: + continue + rows.append({ + "theme": str(theme), + "recent_avg": round(recent_avg, 2), + "prior_avg": round(prior_avg, 2), + "delta": round(delta, 2), + }) + + rising = sorted([r for r in rows if r["delta"] > 0], key=lambda r: (-r["delta"], -r["recent_avg"]))[:6] + declining = sorted([r for r in rows if r["delta"] < 0], key=lambda r: (r["delta"], -r["recent_avg"]))[:6] + return rising, declining + + +def _input_depth_insights(df: pd.DataFrame) -> dict[str, Any]: + if df.empty or "input_depth_score" not in df.columns: + return { + "correlation": None, + "by_band": [], + "note": "No input-depth insight available.", + } + + corr = None + if df["input_depth_score"].nunique() > 1 and df["quality_score"].nunique() > 1: + corr = round(float(df["input_depth_score"].corr(df["quality_score"])), 2) + + band_order = ["Sparse", "Light", "Balanced", "Rich"] + grouped = ( + df.groupby("input_depth_band") + .agg( + count=("quality_score", "size"), + avg_quality=("quality_score", "mean"), + meaningful_pct=("meaningful_flag", lambda s: _safe_pct(s.sum(), len(s))), + high_value_pct=("high_value_flag", lambda s: _safe_pct(s.sum(), len(s))), + shallow_pct=("shallow_flag", lambda s: _safe_pct(s.sum(), len(s))), + avg_input_depth=("input_depth_score", "mean"), + ) + .reset_index() + ) + grouped["band_order"] = grouped["input_depth_band"].map({b: i for i, b in enumerate(band_order)}) + grouped = grouped.sort_values("band_order") + by_band = [ + { + "band": r["input_depth_band"], + "count": int(r["count"]), + "avg_input_depth": round(float(r["avg_input_depth"]), 1), + "avg_quality": round(float(r["avg_quality"]), 1), + "meaningful_pct": round(float(r["meaningful_pct"]), 1), + "high_value_pct": round(float(r["high_value_pct"]), 1), + "shallow_pct": round(float(r["shallow_pct"]), 1), + } + for _, r in grouped.iterrows() + ] + + note = "Input depth appears usable as a supporting quality metric." + if corr is None: + note = "Input depth could not be tested reliably against quality because there was not enough variation." + elif corr < 0.25: + note = "Input depth is only weakly aligned with overall quality, so it should remain a secondary metric." + elif corr < 0.5: + note = "Input depth is moderately aligned with overall quality and is useful as a supporting metric." + + return { + "correlation": corr, + "by_band": by_band, + "note": note, + } + + +def _analyse_two_year_trends( + se: pd.DataFrame, + llc: pd.DataFrame, + events: pd.DataFrame, + start_date: str, +) -> dict[str, Any]: + """ + Build a rolling two-year Safety Energy trend and quality view. + + Counts come from Safety Energy. Quality is inferred through practical + proxies such as text richness, specificity, action/follow-up signals, + hazard recognition, and repeated generic entries. + """ + if se.empty: + return {"note": "No Safety Energy data available for two-year trend analysis."} + + max_date = se["date"].max() + window_start = _pick_window_start(max_date) + requested_start = pd.Timestamp(start_date) + se_window = se[(se["date"] >= window_start) & (se["date"] <= max_date)].copy() + llc_window = llc[(llc["date"] >= window_start) & (llc["date"] <= max_date)].copy() + events_window = events[(events["date"] >= window_start) & (events["date"] <= max_date)].copy() + + if se_window.empty: + return {"note": "No Safety Energy records fall within the rolling two-year window."} + + quality_df = _build_quality_frame(se_window) + all_months = pd.period_range( + quality_df["date"].min().to_period("M"), + quality_df["date"].max().to_period("M"), + freq="M", + ) + all_quarters = pd.period_range( + quality_df["date"].min().to_period("Q"), + quality_df["date"].max().to_period("Q"), + freq="Q", + ) + + monthly_counts = ( + quality_df.groupby(["year_month", "activity_type"]).size() + .unstack(fill_value=0) + .reindex(all_months, fill_value=0) + ) + quarterly_counts = ( + quality_df.assign(year_quarter=quality_df["date"].dt.to_period("Q")) + .groupby(["year_quarter", "activity_type"]).size() + .unstack(fill_value=0) + .reindex(all_quarters, fill_value=0) + ) + monthly_quality = ( + quality_df.groupby(["year_month", "activity_type"])["quality_score"].mean() + .unstack(fill_value=np.nan) + .reindex(all_months) + ) + + activity_insights: dict[str, Any] = {} + quality_rows: list[dict[str, Any]] = [] + bu_snapshots: dict[str, list[dict[str, Any]]] = {} + low_value_units: list[dict[str, Any]] = [] + depth_insights_by_type: dict[str, Any] = {} + + for atype in LEADING_ACTIVITY_TYPES: + sub = quality_df[quality_df["activity_type"] == atype].copy() + if sub.empty: + continue + + module_topics = _top_dict(sub.get("module_name", pd.Series(dtype="object")), 8) + text_topics = _top_dict(sub.get("llc_topic", pd.Series(dtype="object")), 8) + sub_events = ( + events_window[events_window["business_unit"].isin(sub["business_unit"].dropna().unique())] + if "business_unit" in events_window.columns else pd.DataFrame() + ) + summary = _summarise_quality_slice(sub) + depth_insights = _input_depth_insights(sub) + summary.update({ + "top_modules": module_topics, + "top_topics": text_topics, + "avg_at_risk": round(float(pd.to_numeric(sub.get("at_risk_aspects"), errors="coerce").fillna(0).mean()), 2), + "avg_actions": round(float(pd.to_numeric(sub.get("actions"), errors="coerce").fillna(0).mean()), 2), + }) + depth_insights_by_type[atype] = depth_insights + + bu_rows: list[dict[str, Any]] = [] + if "business_unit" in sub.columns: + grouped = ( + sub.groupby("business_unit") + .agg( + count=("quality_score", "size"), + avg_quality=("quality_score", "mean"), + shallow_pct=("shallow_flag", lambda s: _safe_pct(s.sum(), len(s))), + high_value_pct=("high_value_flag", lambda s: _safe_pct(s.sum(), len(s))), + repetitive_pct=("repetitive_flag", lambda s: _safe_pct(s.sum(), len(s))), + ) + .reset_index() + ) + grouped = grouped[grouped["count"] >= 20].sort_values(["avg_quality", "count"], ascending=[False, False]) + bu_rows = [ + { + "business_unit": r["business_unit"], + "count": int(r["count"]), + "avg_quality": round(float(r["avg_quality"]), 1), + "shallow_pct": round(float(r["shallow_pct"]), 1), + "high_value_pct": round(float(r["high_value_pct"]), 1), + "repetitive_pct": round(float(r["repetitive_pct"]), 1), + } + for _, r in grouped.iterrows() + ] + bu_snapshots[atype] = bu_rows[:8] + for row in bu_rows: + if row["count"] >= 30 and row["shallow_pct"] >= 45: + low_value_units.append({ + "activity_type": atype, + "business_unit": row["business_unit"], + "count": row["count"], + "avg_quality": row["avg_quality"], + "shallow_pct": row["shallow_pct"], + }) + + quality_rows.append({ + "activity_type": atype, + "count": summary["count"], + "avg_quality": summary["avg_quality"], + "avg_input_depth": summary["avg_input_depth"], + "meaningful_pct": summary["meaningful_pct"], + "high_value_pct": summary["high_value_pct"], + "shallow_pct": summary["shallow_pct"], + "preventive_pct": summary["preventive_pct"], + "reactive_pct": summary["reactive_pct"], + "repetitive_pct": summary["repetitive_pct"], + "follow_up_pct": summary["follow_up_pct"], + "rich_input_pct": summary["rich_input_pct"], + }) + + yoy = {} + if len(sub["year"].dropna().unique()) >= 2: + yearly = sub.groupby("year").agg( + count=("quality_score", "size"), + quality=("quality_score", "mean"), + meaningful=("meaningful_flag", "mean"), + ).sort_index() + if len(yearly) >= 2: + prev = yearly.iloc[-2] + curr = yearly.iloc[-1] + yoy = { + "count_change_pct": round(_safe_pct(curr["count"] - prev["count"], prev["count"]), 1), + "quality_change": round(float(curr["quality"] - prev["quality"]), 1), + "meaningful_change_pct": round((float(curr["meaningful"]) - float(prev["meaningful"])) * 100, 1), + } + + activity_insights[atype] = { + **summary, + "top_modules": module_topics, + "top_topics": text_topics, + "business_units": bu_rows, + "input_depth": depth_insights, + "yoy": yoy, + } + + overall_themes = Counter() + for blob in quality_df["_text_blob"]: + matches = _theme_matches(blob) + for theme in matches: + overall_themes[theme] += 1 + + ccc_df = quality_df[quality_df["activity_type"] == "CCC"].copy() + ccc_rising, ccc_declining = _summarise_theme_trend(ccc_df) + overall_rising, overall_declining = _summarise_theme_trend(quality_df) + + high_volume_low_value = sorted( + low_value_units, + key=lambda r: (-r["count"], -r["shallow_pct"], r["avg_quality"]), + )[:8] + + recurring_modules: list[dict[str, Any]] = [] + if "module_name" in ccc_df.columns: + module_summary = ( + ccc_df.groupby("module_name") + .agg( + count=("quality_score", "size"), + avg_quality=("quality_score", "mean"), + repetitive_pct=("repetitive_flag", lambda s: _safe_pct(s.sum(), len(s))), + shallow_pct=("shallow_flag", lambda s: _safe_pct(s.sum(), len(s))), + ) + .reset_index() + .sort_values("count", ascending=False) + ) + recurring_modules = [ + { + "module_name": r["module_name"], + "count": int(r["count"]), + "avg_quality": round(float(r["avg_quality"]), 1), + "repetitive_pct": round(float(r["repetitive_pct"]), 1), + "shallow_pct": round(float(r["shallow_pct"]), 1), + } + for _, r in module_summary.head(10).iterrows() + ] + + monthly_mix = [ + { + "period": str(period), + **{atype: int(monthly_counts.loc[period, atype]) if atype in monthly_counts.columns else 0 for atype in LEADING_ACTIVITY_TYPES}, + } + for period in all_months + ] + + quality_monthly_rows = [ + { + "period": str(period), + **{ + atype: round(float(monthly_quality.loc[period, atype]), 1) + if atype in monthly_quality.columns and pd.notna(monthly_quality.loc[period, atype]) else None + for atype in LEADING_ACTIVITY_TYPES + }, + } + for period in all_months + ] + + quarter_rows = [ + { + "period": str(period), + **{atype: int(quarterly_counts.loc[period, atype]) if atype in quarterly_counts.columns else 0 for atype in LEADING_ACTIVITY_TYPES}, + } + for period in all_quarters + ] + + seasonality = ( + quality_df.assign(month_name=quality_df["date"].dt.month_name()) + .groupby("month_name").size().sort_values(ascending=False) + ) + overall_input_depth = _input_depth_insights(quality_df) + + executive_summary: list[str] = [] + ccc_summary = activity_insights.get("CCC", {}) + occ_summary = activity_insights.get("OCC", {}) + llc_summary = activity_insights.get("LLC", {}) + + if ccc_summary: + executive_summary.append( + f"CCCs averaged a quality score of {ccc_summary.get('avg_quality', 0):.1f}/100 over the last " + f"{len(all_months)} months, with {ccc_summary.get('shallow_pct', 0):.1f}% assessed as shallow " + f"and {ccc_summary.get('high_value_pct', 0):.1f}% assessed as high value." + ) + if high_volume_low_value: + hv = high_volume_low_value[0] + executive_summary.append( + f"{hv['business_unit']} shows a high-volume / low-value pattern in {hv['activity_type']} activity: " + f"{hv['count']} records with average quality {hv['avg_quality']:.1f} and {hv['shallow_pct']:.1f}% shallow entries." + ) + if overall_rising: + executive_summary.append( + f"Emerging Safety Energy themes in the recent six months include " + f"{', '.join(r['theme'] for r in overall_rising[:3])}." + ) + if llc_summary and occ_summary: + stronger = "LLC" if llc_summary.get("avg_quality", 0) >= occ_summary.get("avg_quality", 0) else "OCC" + executive_summary.append( + f"{stronger} records currently show the strongest overall documentation quality among the three leading activity types." + ) + if overall_input_depth.get("correlation") is not None: + executive_summary.append( + f"Input depth and quality are correlated at r = {overall_input_depth['correlation']:.2f}, indicating that fuller records are a useful supporting signal for activity quality." + ) + + leadership_focus: list[str] = [] + if ccc_summary.get("repetitive_pct", 0) >= 20: + leadership_focus.append( + "CCC records show a material level of repeated or duplicated wording, suggesting some checks may be drifting toward compliance-only completion." + ) + if ccc_declining: + leadership_focus.append( + f"CCC focus on {', '.join(item['theme'] for item in ccc_declining[:3])} has reduced in the recent six months; confirm this is intentional rather than a blind spot." + ) + if overall_declining: + leadership_focus.append( + f"Previously visible themes such as {', '.join(item['theme'] for item in overall_declining[:3])} are appearing less often in recorded activity narratives." + ) + if ccc_summary.get("follow_up_pct", 0) < 35: + leadership_focus.append( + "A relatively low share of CCCs contain clear follow-up or close-out signals, which weakens the evidence that issues identified in checks are being converted into learning and action." + ) + if overall_input_depth.get("correlation") is not None and overall_input_depth["correlation"] >= 0.4: + leadership_focus.append( + "Rows with richer input depth are materially more likely to read as meaningful records, so populated-field depth can be used as a practical early warning metric for declining quality." + ) + if not leadership_focus: + leadership_focus.append( + "No dominant low-value pattern was detected across the full two-year window, but monthly quality should still be monitored for slippage." + ) + + recommendations = [ + "Use CCC quality, not just CCC count, as a leadership KPI. Track shallow-entry rate, follow-up rate, and repeated wording monthly.", + "Review the highest-volume low-value Business Units with their leaders and sample the underlying records to confirm whether quality concerns are real or data-entry related.", + "Push recurring CCC/OCC themes that show little improvement into board-level focus areas where repeated exposure is visible but learning evidence is weak.", + ] + + return { + "window_start": window_start.strftime("%Y-%m-%d"), + "window_end": max_date.strftime("%Y-%m-%d"), + "window_months": int(len(all_months)), + "requested_start_date": requested_start.strftime("%Y-%m-%d"), + "monthly_mix": monthly_mix, + "quarterly_mix": quarter_rows, + "monthly_quality": quality_monthly_rows, + "quality_by_type": quality_rows, + "activity_insights": activity_insights, + "input_depth": overall_input_depth, + "input_depth_by_type": depth_insights_by_type, + "top_themes": dict(overall_themes.most_common(10)), + "rising_themes": overall_rising, + "declining_themes": overall_declining, + "ccc_rising_themes": ccc_rising, + "ccc_declining_themes": ccc_declining, + "ccc_recurring_modules": recurring_modules, + "high_volume_low_value": high_volume_low_value, + "bu_quality_snapshots": bu_snapshots, + "seasonality": {k: int(v) for k, v in seasonality.head(6).items()}, + "executive_summary": executive_summary, + "leadership_focus": leadership_focus, + "recommendations": recommendations, + "proxy_note": ( + "Quality is inferred using practical proxies: richness and specificity of text, risk recognition, " + "action/follow-up language, evidence of learning, input depth across useful fields, and penalties for generic or repeated wording. " + "These scores indicate likely value, not definitive assurance." + ), + "note": ( + "The deeper Safety Energy analysis uses a rolling two-year window ending on the latest Safety Energy record. " + "Counts come from Safety_Energy.xlsx; LLC_Data is used separately elsewhere for richer LLC theme detail." + ), + "_quality_df": quality_df, + "_llc_window": llc_window, + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Data quality profiling +# ───────────────────────────────────────────────────────────────────────────── + +def _profile_data_quality( + events: pd.DataFrame, + se: pd.DataFrame, + llc: pd.DataFrame, +) -> dict[str, Any]: + """ + Summarise row counts, date coverage, and null rates for key fields. + """ + def _date_range(df: pd.DataFrame) -> tuple[str, str]: + mn = df["date"].min() + mx = df["date"].max() + return ( + mn.strftime("%d %b %Y") if pd.notna(mn) else "N/A", + mx.strftime("%d %b %Y") if pd.notna(mx) else "N/A", + ) + + def _null_pct(df: pd.DataFrame, col: str) -> str: + if col not in df.columns: + return "N/A" + return f"{df[col].isna().mean() * 100:.1f}%" + + ev_range = _date_range(events) + se_range = _date_range(se) + llc_range = _date_range(llc) + + return { + "events": { + "rows": len(events), + "date_from": ev_range[0], + "date_to": ev_range[1], + "null_event_type": _null_pct(events, "event_type"), + "null_consequence": _null_pct(events, "consequence"), + "null_business_unit": _null_pct(events, "business_unit"), + "null_root_cause": _null_pct(events, "root_cause_cat"), + "duplicate_ids": int(events["EventID"].duplicated().sum()) if "EventID" in events.columns else "N/A", + }, + "safety_energy": { + "rows": len(se), + "date_from": se_range[0], + "date_to": se_range[1], + "type_breakdown": se["activity_type"].value_counts().to_dict() if "activity_type" in se else {}, + "null_leader": _null_pct(se, "leader"), + "null_bu": _null_pct(se, "business_unit"), + }, + "llc": { + "rows": len(llc), + "date_from": llc_range[0], + "date_to": llc_range[1], + "null_topic": _null_pct(llc, "topic"), + "null_leader": _null_pct(llc, "leader"), + }, + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Events analysis +# ───────────────────────────────────────────────────────────────────────────── + +def _parse_time_to_hour(value: Any) -> Optional[int]: + if pd.isna(value): + return None + text = str(value).strip() + if not text or text.lower() == "nan": + return None + parsed = pd.to_datetime(text, errors="coerce") + if pd.notna(parsed): + return int(parsed.hour) + match = re.search(r"(\d{1,2}):(\d{2})", text) + if match: + return int(match.group(1)) + return None + + +def _time_bucket(hour: Optional[int]) -> str: + if hour is None: + return "Unknown" + if 0 <= hour < 6: + return "Night (00:00-05:59)" + if 6 <= hour < 12: + return "Morning (06:00-11:59)" + if 12 <= hour < 18: + return "Afternoon (12:00-17:59)" + return "Evening (18:00-23:59)" + + +def _analyse_events( + events: pd.DataFrame, + start_date: str, + split_date: str, + pd1_name: str, + pd2_name: str, +) -> dict[str, Any]: + """Whole-of-period events view with serious-event, timing, and MVE insight.""" + + df = events[events["date"] >= pd.Timestamp(start_date)].copy() + if df.empty: + return {"total": 0, "_df": df} + + def _pct(n: int, total: int) -> str: + return f"{n/max(total,1)*100:.1f}%" + + def _cons_counts(sub: pd.DataFrame) -> dict[str, int]: + if "consequence" not in sub: + return {} + return {c: int((sub["consequence"] == c).sum()) for c in CONSEQUENCE_ORDER} + + def _crp_counts(sub: pd.DataFrame) -> dict[str, int]: + if "crp" not in sub: + return {} + vc = sub["crp"].dropna().astype(str).str.strip().value_counts() + vc = vc[~vc.index.isin(["None Identified", "Under Investigation", "nan", ""])] + return vc.head(10).to_dict() + + monthly_all = ( + df.groupby("year_month").size() + .reindex( + pd.period_range(df["date"].min().to_period("M"), df["date"].max().to_period("M"), freq="M"), + fill_value=0, + ) + ) + months = max(1, len(monthly_all)) + injury_class = df["injury_class"].value_counts().to_dict() if "injury_class" in df.columns else {} + serious = df[df["consequence"].isin(CONSEQUENCE_SERIOUS)].copy() if "consequence" in df.columns else df.iloc[0:0] + + if "Time of Event" in serious.columns: + serious["_event_hour"] = serious["Time of Event"].map(_parse_time_to_hour) + serious["_time_bucket"] = serious["_event_hour"].map(_time_bucket) + time_bucket_counts = serious["_time_bucket"].value_counts().to_dict() + time_hour_counts = serious["_event_hour"].dropna().astype(int).value_counts().sort_index().to_dict() + timed_serious = int(serious["_event_hour"].notna().sum()) + else: + time_bucket_counts = {} + time_hour_counts = {} + timed_serious = 0 + + motor = ( + df[df["event_type"].astype(str).str.contains("motor|vehicle|mva|traffic", case=False, na=False)].copy() + if "event_type" in df.columns else df.iloc[0:0] + ) + serious_motor = motor[motor["consequence"].isin(CONSEQUENCE_SERIOUS)].copy() if "consequence" in motor.columns else motor.iloc[0:0] + + return { + "total": len(df), + "date_from": df["date"].min().strftime("%d %b %Y"), + "date_to": df["date"].max().strftime("%d %b %Y"), + "months": months, + "events_per_month": round(len(df) / months, 1), + "serious_count": int(len(serious)), + "serious_pct": _pct(len(serious), len(df)), + "lti_count": int(injury_class.get("Lost Time Injury", 0)), + "fai_count": int(injury_class.get("First Aid Treatment", 0)), + "event_type_counts": df["event_type"].value_counts().to_dict() if "event_type" in df.columns else {}, + "consequence_counts": _cons_counts(df), + "crp_counts": _crp_counts(df), + "root_cause_counts": df["root_cause_cat"].value_counts().head(10).to_dict() if "root_cause_cat" in df.columns else {}, + "serious_projects": serious["project"].value_counts().head(8).to_dict() if "project" in serious.columns else {}, + "serious_locations": serious["location"].value_counts().head(8).to_dict() if "location" in serious.columns else {}, + "serious_bus": serious["business_unit"].value_counts().head(8).to_dict() if "business_unit" in serious.columns else {}, + "serious_time_buckets": time_bucket_counts, + "serious_time_hours": {str(k): int(v) for k, v in time_hour_counts.items()}, + "serious_time_coverage_pct": round(_safe_pct(timed_serious, len(serious)), 1) if len(serious) else 0.0, + "motor_vehicle": { + "count": int(len(motor)), + "pct_total": round(_safe_pct(len(motor), len(df)), 1), + "serious_count": int(len(serious_motor)), + "serious_pct_within_mve": round(_safe_pct(len(serious_motor), len(motor)), 1) if len(motor) else 0.0, + "consequence_counts": _cons_counts(motor), + "top_projects": motor["project"].value_counts().head(8).to_dict() if "project" in motor.columns else {}, + "top_locations": motor["location"].value_counts().head(8).to_dict() if "location" in motor.columns else {}, + "road_types": _top_dict(motor.get("Road Type", pd.Series(dtype="object")), 6), + "conditions": _top_dict(motor.get("Road Conditions", pd.Series(dtype="object")), 6), + "vehicle_types": _top_dict(motor.get("Type of vehicle involved", pd.Series(dtype="object")), 6), + }, + "monthly_all": {str(k): int(v) for k, v in monthly_all.items()}, + "_df": df, + "_serious": serious, + "_motor": motor, + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Leading activity analysis +# ───────────────────────────────────────────────────────────────────────────── + +def _analyse_leading( + se: pd.DataFrame, + llc: pd.DataFrame, + start_date: str, +) -> dict[str, Any]: + """ + Summarise leading activities from Safety Energy + LLC Data. + + LLC_Data is used for its richer free-text (topic, CRP focus, observations). + Safety Energy provides the authoritative counts for all three activity types. + """ + se_f = se[se["date"] >= pd.Timestamp(start_date)].copy() + + # Monthly counts by activity type + monthly_by_type: dict[str, dict[str, int]] = {} + all_months = pd.period_range(se_f["date"].min().to_period("M"), + se_f["date"].max().to_period("M"), freq="M") + + for atype in LEADING_ACTIVITY_TYPES: + sub = se_f[se_f["activity_type"] == atype] + monthly = sub.groupby("year_month").size().reindex(all_months, fill_value=0) + monthly_by_type[atype] = {str(k): int(v) for k, v in monthly.items()} + + # BU breakdown + bu_by_type: dict[str, dict[str, int]] = {} + for atype in LEADING_ACTIVITY_TYPES: + sub = se_f[se_f["activity_type"] == atype] + if "business_unit" in sub: + bu_by_type[atype] = sub["business_unit"].value_counts().to_dict() + + # Top leaders (LLC only, from LLC_Data for richer detail) + llc_f = llc[llc["date"] >= pd.Timestamp(start_date)].copy() + top_leaders: dict[str, int] = {} + if "leader" in llc_f: + top_leaders = ( + llc_f["leader"].dropna().value_counts() + .head(15).to_dict() + ) + + # LLC topics + top_topics: dict[str, int] = {} + if "topic" in llc_f: + top_topics = ( + llc_f["topic"].dropna() + .str.strip() + .value_counts() + .head(15).to_dict() + ) + + # CRP focus areas in LLCs + crp_focus: dict[str, int] = {} + if "crp_focus" in llc_f: + crp_focus = ( + llc_f["crp_focus"].dropna() + .str.strip() + .value_counts() + .head(10).to_dict() + ) + + # At-risk flags from LLC_Data + at_risk_total = 0 + if "at_risk_flag" in llc_f: + at_risk_total = int(llc_f["at_risk_flag"].sum()) + + # Overall totals + totals = se_f["activity_type"].value_counts().to_dict() + + # Average at-risk aspects per activity + avg_at_risk: dict[str, float] = {} + if "at_risk_aspects" in se_f: + for atype in LEADING_ACTIVITY_TYPES: + sub = se_f[se_f["activity_type"] == atype] + val = sub["at_risk_aspects"].mean() + avg_at_risk[atype] = round(float(val), 2) if pd.notna(val) else 0.0 + + # Monthly total (all types combined) + monthly_total = ( + se_f.groupby("year_month").size() + .reindex(all_months, fill_value=0) + ) + + # Trend direction (slope of last 6 months vs prior 6 months) + def _trend_dir(series: pd.Series) -> str: + if len(series) < 4: + return "insufficient data" + recent = series.iloc[-min(6, len(series)):] + prior = series.iloc[max(0, -12):-6] if len(series) >= 12 else series.iloc[:max(1, len(series)-6)] + if prior.mean() == 0: + return "no prior baseline" + change = (recent.mean() - prior.mean()) / prior.mean() * 100 + if change > 10: + return f"increasing (+{change:.0f}%)" + elif change < -10: + return f"declining ({change:.0f}%)" + return f"stable ({change:+.0f}%)" + + activity_trend = _trend_dir(monthly_total) + + return { + "totals": totals, + "monthly_by_type": monthly_by_type, + "monthly_total": {str(k): int(v) for k, v in monthly_total.items()}, + "bu_by_type": bu_by_type, + "top_leaders": top_leaders, + "top_topics": top_topics, + "crp_focus": crp_focus, + "at_risk_total_llc": at_risk_total, + "avg_at_risk": avg_at_risk, + "activity_trend": activity_trend, + "all_months": [str(m) for m in all_months], + "_se_f": se_f, + "_llc_f": llc_f, + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Effectiveness analysis +# ───────────────────────────────────────────────────────────────────────────── + +def _analyse_effectiveness( + events_result: dict, + leading_result: dict, +) -> dict[str, Any]: + """ + Assess whether leading activities appear associated with event outcomes. + + Uses Business Unit as the common grouping dimension. + All language is deliberately cautious (associated with, may indicate). + """ + ev_df = events_result.get("_df") + se_f = leading_result.get("_se_f") + + if ev_df is None or se_f is None or "business_unit" not in ev_df.columns: + return {"note": "Insufficient data for effectiveness analysis."} + + # BU-level: total leading activities vs total events + bu_activities = se_f.groupby("business_unit").size().rename("activities") + bu_events = ev_df.groupby("business_unit").size().rename("events") + + bu_table = pd.concat([bu_activities, bu_events], axis=1).fillna(0) + bu_table.columns = ["activities", "events"] + bu_table = bu_table[bu_table["activities"] > 0].sort_values("activities", ascending=False) + + # Monthly correlation: do more activities in month M associate with fewer events? + # Use a 1-month lag (activities in M, events in M+1) + monthly_acts = se_f.groupby("year_month").size() + monthly_events = ev_df.groupby("year_month").size() + + common_months = monthly_acts.index.intersection(monthly_events.index) + corr_value: Optional[float] = None + corr_note = "Insufficient overlapping months for correlation analysis." + + if len(common_months) >= CORR_MIN_MONTHS: + a_vals = monthly_acts.reindex(common_months, fill_value=0).values + e_vals = monthly_events.reindex(common_months, fill_value=0).values + if np.std(a_vals) > 0 and np.std(e_vals) > 0: + corr_value = float(np.corrcoef(a_vals, e_vals)[0, 1]) + direction = "positive" if corr_value > 0 else "negative" + strength = "weak" if abs(corr_value) < 0.3 else ("moderate" if abs(corr_value) < 0.6 else "strong") + corr_note = ( + f"A {strength} {direction} association (r = {corr_value:.2f}) was observed " + f"between monthly leading-activity counts and monthly event counts across " + f"{len(common_months)} overlapping months. " + + ("This may warrant further review — high activity volumes and high event rates " + "in the same periods could reflect reactive activity rather than prevention." + if corr_value > 0.3 else + "A negative association is consistent with leading activities having a " + "preventive effect, though causation cannot be assumed from this data alone." + if corr_value < -0.3 else + "No strong directional association was identified.") + ) + + # BUs with high activity AND high events (possible reactive pattern) + high_both: list[str] = [] + high_acts_low_events: list[str] = [] + + if len(bu_table) >= 2: + act_median = bu_table["activities"].median() + evt_median = bu_table["events"].median() + for bu, row in bu_table.iterrows(): + if row["activities"] >= act_median and row["events"] >= evt_median: + high_both.append(str(bu)) + elif row["activities"] >= act_median and row["events"] < evt_median: + high_acts_low_events.append(str(bu)) + + return { + "bu_table": bu_table.reset_index().to_dict("records"), + "corr_value": corr_value, + "corr_note": corr_note, + "high_activity_high_events": high_both, + "high_activity_low_events": high_acts_low_events, + "note": ( + "Effectiveness analysis uses business unit-level and monthly aggregates. " + "All associations are indicative only — correlation does not imply causation." + ), + } + + +# ───────────────────────────────────────────────────────────────────────────── +# At-risk behaviour analysis +# ───────────────────────────────────────────────────────────────────────────── + +def _extract_at_risk_themes( + events: pd.DataFrame, + se: pd.DataFrame, + llc: pd.DataFrame, + start_date: str, +) -> dict[str, Any]: + """ + Extract at-risk behaviour themes using keyword matching against free-text + fields in LLC_Data, Safety Energy, and Events. + + No cloud APIs; all processing is local. + """ + ev_f = events[events["date"] >= pd.Timestamp(start_date)] + llc_f = llc[llc["date"] >= pd.Timestamp(start_date)] + se_f = se[se["date"] >= pd.Timestamp(start_date)] + + # Collect text blobs from each source + ev_texts = _collect_text(ev_f, ["brief_desc", "event_desc", "root_cause_cat"]) + llc_texts = _collect_text(llc_f, ["topic", "at_risk_obs", "crp_focus"]) + se_texts = _collect_text(se_f, ["llc_topic", "at_risk_obs"]) + + def _score(texts: list[str]) -> dict[str, int]: + counts: Counter = Counter() + for text in texts: + tl = text.lower() + for theme, keywords in AT_RISK_KEYWORDS.items(): + if any(kw in tl for kw in keywords): + counts[theme] += 1 + return dict(counts.most_common()) + + ev_themes = _score(ev_texts) + llc_themes = _score(llc_texts) + se_themes = _score(se_texts) + + # Combine: weight events × 2 (lagging = higher severity signal) + combined: Counter = Counter() + for theme, cnt in ev_themes.items(): + combined[theme] += cnt * 2 + for theme, cnt in llc_themes.items(): + combined[theme] += cnt + for theme, cnt in se_themes.items(): + combined[theme] += cnt + + # Alignment gap: themes prominent in events but absent in LLC discussions + llc_top = set(list(llc_themes.keys())[:5]) + events_top = set(list(ev_themes.keys())[:5]) + gap_themes = events_top - llc_top + + # Top LLC topics (free text) + top_llc_topics: dict[str, int] = {} + if "topic" in llc_f.columns: + top_llc_topics = llc_f["topic"].dropna().value_counts().head(10).to_dict() + + # CRP focus in LLCs + top_crp_focus: dict[str, int] = {} + if "crp_focus" in llc_f.columns: + top_crp_focus = llc_f["crp_focus"].dropna().value_counts().head(8).to_dict() + + return { + "event_themes": ev_themes, + "llc_themes": llc_themes, + "combined_themes": dict(combined.most_common(10)), + "gap_themes": list(gap_themes), + "top_llc_topics": top_llc_topics, + "top_crp_focus": top_crp_focus, + "note": ( + "Theme extraction uses keyword matching against free-text fields. " + "Results are indicative; manual review of underlying records is recommended " + "before drawing firm conclusions." + ), + } + + +def _collect_text(df: pd.DataFrame, cols: list[str]) -> list[str]: + """Collect non-null text entries from named columns.""" + texts = [] + for col in cols: + if col in df.columns: + texts.extend(df[col].dropna().astype(str).str.strip().tolist()) + return texts + + +def _compare_dimension( + events_df: pd.DataFrame, + se_df: pd.DataFrame, + dimension: str, + min_activities: int = 10, +) -> dict[str, Any]: + if dimension not in events_df.columns or dimension not in se_df.columns: + return {"table": [], "best": [], "watch": []} + + serious = events_df[events_df["consequence"].isin(CONSEQUENCE_SERIOUS)].copy() if "consequence" in events_df.columns else events_df.iloc[0:0] + activities = se_df.groupby(dimension).size().rename("activities") + events = events_df.groupby(dimension).size().rename("events") + serious_events = serious.groupby(dimension).size().rename("serious_events") + comp = pd.concat([activities, events, serious_events], axis=1).fillna(0) + if comp.empty: + return {"table": [], "best": [], "watch": []} + + comp = comp.astype(int) + comp["activity_event_ratio"] = comp.apply( + lambda r: round(r["activities"] / r["events"], 1) if r["events"] > 0 else None, + axis=1, + ) + + def _rows(df: pd.DataFrame, label: str) -> list[dict[str, Any]]: + rows = [] + for _, row in df.reset_index().iterrows(): + ratio = row["activity_event_ratio"] + rows.append({ + label: row[label], + "activities": int(row["activities"]), + "events": int(row["events"]), + "serious_events": int(row["serious_events"]), + "activity_event_ratio": None if pd.isna(ratio) else ratio, + }) + return rows + + best = ( + comp[comp["activities"] >= min_activities] + .sort_values(["serious_events", "events", "activities"], ascending=[True, True, False]) + .head(8) + ) + watch = comp.sort_values(["serious_events", "events", "activities"], ascending=[False, False, False]).head(8) + + return { + "table": _rows(comp.sort_values(["activities", "events"], ascending=[False, False]).head(25), dimension), + "best": _rows(best, dimension), + "watch": _rows(watch, dimension), + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Safety Energy ↔ Events relationship analysis +# ───────────────────────────────────────────────────────────────────────────── + +def _analyse_se_events_relationship( + events: pd.DataFrame, + se: pd.DataFrame, + start_date: str, +) -> dict[str, Any]: + """ + Compare monthly Safety Energy activity levels against Events, overall and + by Business Unit. Uses cautious associative language throughout. + """ + ev_f = events[events["date"] >= pd.Timestamp(start_date)].copy() + se_f = se[se["date"] >= pd.Timestamp(start_date)].copy() + + # Build common month range + all_dates = pd.concat([ev_f["date"], se_f["date"]]) + if all_dates.empty: + return {"note": "No overlapping data to compare."} + + start = all_dates.min().to_period("M") + end = all_dates.max().to_period("M") + all_months = pd.period_range(start, end, freq="M") + + monthly_acts = se_f.groupby("year_month").size().reindex(all_months, fill_value=0) + monthly_events = ev_f.groupby("year_month").size().reindex(all_months, fill_value=0) + + # Trend divergence: periods where events spike but activities don't + spike_months: list[str] = [] + if len(monthly_events) >= 3: + ev_mean = monthly_events.mean() + ev_std = monthly_events.std() + for period, ev_count in monthly_events.items(): + if ev_count > ev_mean + ev_std: + act_count = monthly_acts.get(period, 0) + if act_count < monthly_acts.mean(): + spike_months.append(str(period)) + + # BU comparison table + bu_comp: list[dict] = [] + if "business_unit" in ev_f and "business_unit" in se_f: + bu_acts = se_f.groupby("business_unit").size().rename("activities") + bu_events = ev_f.groupby("business_unit").size().rename("events") + merged = pd.concat([bu_acts, bu_events], axis=1).fillna(0).astype(int) + + # Compute activity-to-event ratio where events > 0 + merged["ratio"] = merged.apply( + lambda r: round(r["activities"] / r["events"], 1) if r["events"] > 0 else None, + axis=1, + ) + bu_comp = merged.reset_index().rename(columns={"index": "business_unit"}).to_dict("records") + + # LLC topic alignment vs event root causes + llc_top_topics: list[str] = [] + ev_top_rc: list[str] = [] + if "topic" in se_f.columns: + llc_sub = se_f[se_f["activity_type"] == "LLC"] + llc_top_topics = llc_sub["llc_topic"].dropna().value_counts().head(5).index.tolist() if "llc_topic" in llc_sub else [] + if "root_cause_cat" in ev_f: + ev_top_rc = ev_f["root_cause_cat"].dropna().value_counts().head(5).index.tolist() + + return { + "monthly_acts": {str(k): int(v) for k, v in monthly_acts.items()}, + "monthly_events": {str(k): int(v) for k, v in monthly_events.items()}, + "spike_months": spike_months, + "bu_comparison": bu_comp, + "project_comparison": _compare_dimension(ev_f, se_f, "project", min_activities=12), + "location_comparison": _compare_dimension(ev_f, se_f, "location", min_activities=10), + "llc_top_topics": llc_top_topics, + "ev_top_rc": ev_top_rc, + "alignment_note": ( + "LLC topic focus and event root causes are compared to identify alignment gaps. " + "Where event root causes diverge from LLC discussion topics, this may indicate " + "that leading activity conversations are not yet targeting the highest-risk themes." + ), + "note": ( + "Monthly comparison covers periods where both datasets have data. " + "Short overlapping periods reduce the reliability of any trend observations. " + "This analysis is associative only." + ), + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Leader and BU focus areas +# ───────────────────────────────────────────────────────────────────────────── + +def _analyse_focus_areas( + events: pd.DataFrame, + se: pd.DataFrame, + llc: pd.DataFrame, + start_date: str, +) -> dict[str, Any]: + """ + Identify Business Units and leaders warranting leadership attention, + based on activity volumes, event rates, and declining trends. + """ + ev_f = events[events["date"] >= pd.Timestamp(start_date)] + se_f = se[se["date"] >= pd.Timestamp(start_date)] + llc_f = llc[llc["date"] >= pd.Timestamp(start_date)] + + # BU-level activity counts and event counts + bu_acts: dict[str, int] = {} + bu_evts: dict[str, int] = {} + if "business_unit" in se_f: + bu_acts = se_f.groupby("business_unit").size().to_dict() + if "business_unit" in ev_f: + bu_evts = ev_f.groupby("business_unit").size().to_dict() + + all_bus = sorted(set(list(bu_acts.keys()) + list(bu_evts.keys()))) + bu_summary = [ + { + "business_unit": bu, + "activities": bu_acts.get(bu, 0), + "events": bu_evts.get(bu, 0), + } + for bu in all_bus + ] + + # Most active leaders + leader_counts: dict[str, int] = {} + if "leader" in se_f: + leader_counts = ( + se_f["leader"].dropna().value_counts() + .head(20).to_dict() + ) + + # Leaders with < LEADER_MIN_ACTIVITIES (gap indicator) + low_activity_leaders: list[str] = [ + l for l, c in leader_counts.items() if c < LEADER_MIN_ACTIVITIES + ] + + # Declining BUs: compare first half vs second half of date range + declining_bus: list[str] = [] + if "business_unit" in se_f and len(se_f) > 0: + mid = se_f["date"].min() + (se_f["date"].max() - se_f["date"].min()) / 2 + for bu in all_bus: + sub = se_f[se_f["business_unit"] == bu] + if len(sub) < 4: + continue + early = len(sub[sub["date"] <= mid]) + late = len(sub[sub["date"] > mid]) + if late < early * 0.7: + declining_bus.append(bu) + + return { + "bu_summary": bu_summary, + "leader_counts": leader_counts, + "low_activity_leaders": low_activity_leaders, + "declining_bus": declining_bus, + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Chart generation +# ───────────────────────────────────────────────────────────────────────────── + +def _generate_charts( + events_res: dict, + leading_res: dict, + se_ev_res: dict, + at_risk_res: dict, + trends_res: dict, + output_dir: str, + pd1_name: str, + pd2_name: str, + split_date: str, +) -> dict[str, str]: + """Generate all charts and return a dict of name → file path.""" + _setup_style() + charts: dict[str, str] = {} + os.makedirs(output_dir, exist_ok=True) + + # ── 1. Events monthly trend ────────────────────────────────────────────── + try: + ev_df = events_res.get("_df") + if ev_df is not None and len(ev_df) > 0: + all_months = pd.period_range( + ev_df["date"].min().to_period("M"), + ev_df["date"].max().to_period("M"), freq="M", + ) + monthly = ev_df.groupby("year_month").size().reindex(all_months, fill_value=0) + x = range(len(all_months)) + labels = _month_labels(all_months) + + fig, ax = plt.subplots(figsize=(11, 4)) + vals = monthly.values + ax.bar(x, vals, color=DEEP_BLUE, width=0.72, alpha=0.9) + rolling = monthly.rolling(3, min_periods=1).mean().values + ax.plot(x, rolling, color=SKY_BLUE, linewidth=2.2, marker="o", markersize=3, label="3-month average") + + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8) + ax.set_title("Monthly Events", fontsize=14, + fontweight="bold", color=DEEP_BLUE) + ax.set_ylabel("Events") + ax.legend(loc="upper right", fontsize=9) + p = os.path.join(output_dir, "ch_events_monthly.png") + _save(fig, p) + charts["events_monthly"] = p + except Exception as e: + log.warning("Chart events_monthly failed: %s", e) + + # ── 2. Leading activities monthly trend (stacked area) ─────────────────── + try: + monthly_by_type = leading_res.get("monthly_by_type", {}) + all_months_str = leading_res.get("all_months", []) + if all_months_str and any(monthly_by_type.values()): + months_idx = [pd.Period(m) for m in all_months_str] + x = range(len(months_idx)) + labels = _month_labels(pd.PeriodIndex(months_idx)) + + fig, ax = plt.subplots(figsize=(11, 4)) + bottom = np.zeros(len(months_idx)) + for atype in LEADING_ACTIVITY_TYPES: + vals = np.array([monthly_by_type.get(atype, {}).get(m, 0) for m in all_months_str]) + ax.bar(x, vals, bottom=bottom, color=ACTIVITY_COLOURS[atype], + label=atype, width=0.8, alpha=0.9) + bottom += vals + + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8) + ax.set_title("Monthly Leading Activities (LLC / CCC / OCC)", + fontsize=14, fontweight="bold", color=DEEP_BLUE) + ax.set_ylabel("Count") + ax.legend(loc="upper right", fontsize=9) + p = os.path.join(output_dir, "ch_leading_monthly.png") + _save(fig, p) + charts["leading_monthly"] = p + except Exception as e: + log.warning("Chart leading_monthly failed: %s", e) + + # ── 3. Activity type mix (donut) ───────────────────────────────────────── + try: + totals = leading_res.get("totals", {}) + if totals: + labels_d = list(totals.keys()) + vals_d = list(totals.values()) + colours = [ACTIVITY_COLOURS.get(l, MUTED) for l in labels_d] + fig, ax = plt.subplots(figsize=(5, 4)) + wedges, _, autotexts = ax.pie( + vals_d, labels=labels_d, autopct="%1.0f%%", + colors=colours, startangle=140, + wedgeprops={"linewidth": 1, "edgecolor": "white"}, + ) + for at in autotexts: + at.set_fontsize(9) + ax.set_title("Activity Type Mix", fontsize=13, fontweight="bold", color=DEEP_BLUE) + p = os.path.join(output_dir, "ch_activity_mix.png") + _save(fig, p) + charts["activity_mix"] = p + except Exception as e: + log.warning("Chart activity_mix failed: %s", e) + + # ── 4. BU comparison: activities vs events ──────────────────────────────── + try: + bu_comp = se_ev_res.get("bu_comparison", []) + if bu_comp: + df_bu = pd.DataFrame(bu_comp).set_index("business_unit") + df_bu = df_bu[["activities", "events"]].sort_values("activities", ascending=True) + y = range(len(df_bu)) + fig, ax = plt.subplots(figsize=(9, max(3, len(df_bu) * 0.6))) + ax.barh([i - 0.2 for i in y], df_bu["activities"].values, + height=0.35, color=DEEP_BLUE, label="Activities") + ax.barh([i + 0.2 for i in y], df_bu["events"].values, + height=0.35, color=RED, label="Events") + ax.set_yticks(list(y)) + ax.set_yticklabels(df_bu.index.tolist(), fontsize=9) + ax.set_title("Activities vs Events by Business Unit", + fontsize=13, fontweight="bold", color=DEEP_BLUE) + ax.legend(fontsize=9) + p = os.path.join(output_dir, "ch_bu_comparison.png") + _save(fig, p) + charts["bu_comparison"] = p + except Exception as e: + log.warning("Chart bu_comparison failed: %s", e) + + # ── 5. Dual-axis: monthly activities and events overlay ─────────────────── + try: + m_acts = se_ev_res.get("monthly_acts", {}) + m_events = se_ev_res.get("monthly_events", {}) + if m_acts and m_events: + all_keys = sorted(set(m_acts) | set(m_events)) + all_p = pd.PeriodIndex([pd.Period(k) for k in all_keys]) + x = range(len(all_p)) + acts_vals = [m_acts.get(k, 0) for k in all_keys] + event_vals = [m_events.get(k, 0) for k in all_keys] + labels_m = _month_labels(all_p) + + fig, ax1 = plt.subplots(figsize=(11, 4)) + ax2 = ax1.twinx() + ax1.bar(x, acts_vals, color=DEEP_BLUE, alpha=0.6, label="Leading Activities", width=0.6) + ax2.plot(x, event_vals, color=RED, linewidth=2, marker="o", markersize=4, label="Events") + ax1.set_xticks(x) + ax1.set_xticklabels(labels_m, rotation=45, ha="right", fontsize=8) + ax1.set_ylabel("Leading Activities", color=DEEP_BLUE) + ax2.set_ylabel("Events", color=RED) + ax1.set_title("Leading Activities vs Events — Monthly Overlay", + fontsize=13, fontweight="bold", color=DEEP_BLUE) + lines1, labs1 = ax1.get_legend_handles_labels() + lines2, labs2 = ax2.get_legend_handles_labels() + ax1.legend(lines1 + lines2, labs1 + labs2, loc="upper left", fontsize=9) + ax1.spines["top"].set_visible(False) + ax2.spines["top"].set_visible(False) + p = os.path.join(output_dir, "ch_overlay.png") + _save(fig, p) + charts["overlay"] = p + except Exception as e: + log.warning("Chart overlay failed: %s", e) + + # ── 6. Top LLC topics ───────────────────────────────────────────────────── + try: + top_topics = at_risk_res.get("top_llc_topics", {}) + if top_topics: + items = sorted(top_topics.items(), key=lambda x: x[1])[-12:] + labels_t = [i[0] for i in items] + vals_t = [i[1] for i in items] + fig, ax = plt.subplots(figsize=(8, max(3, len(items) * 0.4))) + bars = ax.barh(labels_t, vals_t, color=DEEP_BLUE, alpha=0.85) + for bar, val in zip(bars, vals_t): + ax.text(val + 0.2, bar.get_y() + bar.get_height() / 2, + str(val), va="center", fontsize=9) + ax.set_title("Top LLC Conversation Topics", fontsize=13, + fontweight="bold", color=DEEP_BLUE) + ax.set_xlabel("Count") + p = os.path.join(output_dir, "ch_llc_topics.png") + _save(fig, p) + charts["llc_topics"] = p + except Exception as e: + log.warning("Chart llc_topics failed: %s", e) + + # ── 7. At-risk theme heatmap (horizontal bar) ───────────────────────────── + try: + combined = at_risk_res.get("combined_themes", {}) + if combined: + items = sorted(combined.items(), key=lambda x: x[1]) + labels_r = [i[0] for i in items] + vals_r = [i[1] for i in items] + max_v = max(vals_r) if vals_r else 1 + colours_r = [ + RED if v >= max_v * 0.7 + else AMBER if v >= max_v * 0.4 + else DARK_GREEN + for v in vals_r + ] + fig, ax = plt.subplots(figsize=(8, max(3, len(items) * 0.4))) + ax.barh(labels_r, vals_r, color=colours_r, alpha=0.9) + ax.set_title("At-Risk Behaviour Themes (Combined Sources)", + fontsize=13, fontweight="bold", color=DEEP_BLUE) + ax.set_xlabel("Theme frequency (weighted)") + p = os.path.join(output_dir, "ch_at_risk_themes.png") + _save(fig, p) + charts["at_risk_themes"] = p + except Exception as e: + log.warning("Chart at_risk_themes failed: %s", e) + + # ── 8. Events by consequence ────────────────────────────────────────────── + try: + ev_df = events_res.get("_df") + if ev_df is not None and "consequence" in ev_df: + cons_counts = ev_df["consequence"].value_counts().reindex( + CONSEQUENCE_ORDER, fill_value=0 + ) + cons_colors = [DARK_GREEN, AMBER, RED, PURPLE, PURPLE] + fig, ax = plt.subplots(figsize=(7, 3.5)) + bars = ax.bar(cons_counts.index, cons_counts.values, + color=cons_colors[:len(cons_counts)], alpha=0.9) + for bar, val in zip(bars, cons_counts.values): + if val > 0: + ax.text(bar.get_x() + bar.get_width() / 2, val + 0.3, + str(val), ha="center", fontsize=10, fontweight="bold") + ax.set_title("Events by Actual Consequence", fontsize=13, + fontweight="bold", color=DEEP_BLUE) + ax.set_ylabel("Count") + p = os.path.join(output_dir, "ch_consequence.png") + _save(fig, p) + charts["consequence"] = p + except Exception as e: + log.warning("Chart consequence failed: %s", e) + + # ── 9. Top leaders (activities) ─────────────────────────────────────────── + try: + top_leaders = leading_res.get("top_leaders", {}) + if top_leaders: + items = sorted(top_leaders.items(), key=lambda x: x[1])[-15:] + fig, ax = plt.subplots(figsize=(8, max(4, len(items) * 0.4))) + ax.barh([i[0] for i in items], [i[1] for i in items], + color=SKY_BLUE, alpha=0.9) + ax.set_title("Top Leaders by LLC Activity Count", + fontsize=13, fontweight="bold", color=DEEP_BLUE) + ax.set_xlabel("LLC Count") + p = os.path.join(output_dir, "ch_top_leaders.png") + _save(fig, p) + charts["top_leaders"] = p + except Exception as e: + log.warning("Chart top_leaders failed: %s", e) + + # ── 10. CRP focus areas ─────────────────────────────────────────────────── + try: + crp_focus = leading_res.get("crp_focus", {}) + if crp_focus: + items = sorted(crp_focus.items(), key=lambda x: x[1]) + fig, ax = plt.subplots(figsize=(8, max(3, len(items) * 0.4))) + ax.barh([i[0] for i in items], [i[1] for i in items], + color=MID_GREEN, alpha=0.9) + ax.set_title("CRP Focus Areas in Leader Learning Conversations", + fontsize=13, fontweight="bold", color=DEEP_BLUE) + ax.set_xlabel("Count") + p = os.path.join(output_dir, "ch_crp_focus.png") + _save(fig, p) + charts["crp_focus"] = p + except Exception as e: + log.warning("Chart crp_focus failed: %s", e) + + # ── 11. Two-year quality trend by activity type ────────────────────────── + try: + monthly_quality_rows = trends_res.get("monthly_quality", []) + if monthly_quality_rows: + qdf = pd.DataFrame(monthly_quality_rows) + if not qdf.empty: + periods = pd.PeriodIndex([pd.Period(p, freq="M") for p in qdf["period"]]) + x = range(len(periods)) + fig, ax = plt.subplots(figsize=(11, 4)) + for atype in LEADING_ACTIVITY_TYPES: + if atype in qdf.columns and qdf[atype].notna().any(): + ax.plot( + x, + qdf[atype], + marker="o", + linewidth=2, + markersize=3.5, + label=atype, + color=ACTIVITY_COLOURS.get(atype, DEEP_BLUE), + ) + ax.set_xticks(x) + ax.set_xticklabels(_month_labels(periods), rotation=45, ha="right", fontsize=8) + ax.set_ylim(0, 100) + ax.yaxis.set_major_locator(mticker.MultipleLocator(10)) + ax.set_ylabel("Average quality score") + ax.set_title("Two-Year Quality Trend by Activity Type", + fontsize=13, fontweight="bold", color=DEEP_BLUE) + ax.legend(fontsize=9, loc="upper left") + p = os.path.join(output_dir, "ch_quality_trend.png") + _save(fig, p) + charts["quality_trend"] = p + except Exception as e: + log.warning("Chart quality_trend failed: %s", e) + + # ── 12. High-volume / low-value units ──────────────────────────────────── + try: + hvlv = trends_res.get("high_volume_low_value", []) + if hvlv: + df_hv = pd.DataFrame(hvlv[:8]).sort_values("count", ascending=True) + labels = [f"{r['business_unit']} ({r['activity_type']})" for _, r in df_hv.iterrows()] + fig, ax = plt.subplots(figsize=(9, max(3.5, len(df_hv) * 0.5))) + ax.barh(labels, df_hv["count"], color=AMBER, alpha=0.85) + for idx, (_, row) in enumerate(df_hv.iterrows()): + ax.text(row["count"] + 1, idx, f"{row['shallow_pct']:.0f}% shallow", va="center", fontsize=9) + ax.set_title("High-Volume / Low-Value Activity Hotspots", + fontsize=13, fontweight="bold", color=DEEP_BLUE) + ax.set_xlabel("Activity count in two-year window") + p = os.path.join(output_dir, "ch_low_value_units.png") + _save(fig, p) + charts["low_value_units"] = p + except Exception as e: + log.warning("Chart low_value_units failed: %s", e) + + # ── 13. Serious hotspot ranking ────────────────────────────────────────── + try: + serious_projects = events_res.get("serious_projects", {}) + serious_locations = events_res.get("serious_locations", {}) + rows = [] + for label, values in [("Project", serious_projects), ("Location", serious_locations)]: + for name, count in list(values.items())[:5]: + rows.append((f"{name} ({label})", int(count))) + if rows: + rows = sorted(rows, key=lambda x: x[1])[-10:] + labels_h = [r[0] for r in rows] + vals_h = [r[1] for r in rows] + fig, ax = plt.subplots(figsize=(9, max(3.5, len(rows) * 0.45))) + bars = ax.barh(labels_h, vals_h, color=RED, alpha=0.9) + for bar, val in zip(bars, vals_h): + ax.text(val + 0.1, bar.get_y() + bar.get_height() / 2, str(val), va="center", fontsize=9) + ax.set_title("Serious Event Hotspots", fontsize=13, fontweight="bold", color=DEEP_BLUE) + ax.set_xlabel("Moderate / Major / Substantial events") + p = os.path.join(output_dir, "ch_serious_hotspots.png") + _save(fig, p) + charts["serious_hotspots"] = p + except Exception as e: + log.warning("Chart serious_hotspots failed: %s", e) + + # ── 14. Project performance quadrant ───────────────────────────────────── + try: + proj_best = se_ev_res.get("project_comparison", {}).get("best", []) + proj_watch = se_ev_res.get("project_comparison", {}).get("watch", []) + project_rows = {} + for row in proj_best + proj_watch: + name = row.get("project") + if name: + project_rows[name] = row + if project_rows: + pdf = pd.DataFrame(project_rows.values()).head(12) + fig, ax = plt.subplots(figsize=(8.5, 6)) + x = pdf["activities"].astype(float) + y = pdf["events"].astype(float) + sizes = 80 + pdf["serious_events"].astype(float) * 28 + colors = [ + DARK_GREEN if (row["events"] <= y.median() and row["serious_events"] <= pdf["serious_events"].median()) else AMBER + if row["serious_events"] <= pdf["serious_events"].median() else RED + for _, row in pdf.iterrows() + ] + ax.scatter(x, y, s=sizes, c=colors, alpha=0.75, edgecolors="white", linewidths=1.2) + ax.axvline(x.median(), color=MUTED, linestyle="--", linewidth=1) + ax.axhline(y.median(), color=MUTED, linestyle="--", linewidth=1) + for _, row in pdf.iterrows(): + ax.text(row["activities"] + 2, row["events"] + 0.2, str(row["project"])[:28], fontsize=8, color=DEEP_BLUE) + ax.set_title("Project Performance Quadrant", fontsize=13, fontweight="bold", color=DEEP_BLUE) + ax.set_xlabel("Leading activities") + ax.set_ylabel("Events") + p = os.path.join(output_dir, "ch_project_quadrant.png") + _save(fig, p) + charts["project_quadrant"] = p + except Exception as e: + log.warning("Chart project_quadrant failed: %s", e) + + # ── 15. Quality composition by activity type ───────────────────────────── + try: + qrows = trends_res.get("quality_by_type", []) + if qrows: + qdf = pd.DataFrame(qrows) + if not qdf.empty: + mixed = 100 - qdf["high_value_pct"] - qdf["meaningful_pct"] - qdf["shallow_pct"] + fig, ax = plt.subplots(figsize=(8.5, 3.8)) + left = np.zeros(len(qdf)) + segments = [ + ("Shallow", qdf["shallow_pct"].values, RED), + ("Mixed", mixed.values, AMBER), + ("Meaningful", qdf["meaningful_pct"].values, SKY_BLUE), + ("High value", qdf["high_value_pct"].values, DARK_GREEN), + ] + for label, vals, color in segments: + ax.barh(qdf["activity_type"], vals, left=left, color=color, label=label, alpha=0.9) + left += vals + ax.set_xlim(0, 100) + ax.xaxis.set_major_locator(mticker.MultipleLocator(20)) + ax.set_xlabel("Share of records") + ax.set_title("Leading Activity Quality Mix", fontsize=13, fontweight="bold", color=DEEP_BLUE) + ax.legend(loc="lower right", ncol=2, fontsize=8) + p = os.path.join(output_dir, "ch_quality_mix.png") + _save(fig, p) + charts["quality_mix"] = p + except Exception as e: + log.warning("Chart quality_mix failed: %s", e) + + log.info("Generated %d charts in %s", len(charts), output_dir) + return charts + + +# ───────────────────────────────────────────────────────────────────────────── +# Recommendation generation +# ───────────────────────────────────────────────────────────────────────────── + +def _generate_recommendations( + events_res: dict, + leading_res: dict, + effectiveness: dict, + at_risk_res: dict, + focus_areas: dict, + trends_res: dict, + se_events_rel: dict, +) -> list[str]: + """Derive actionable recommendations from analysis results.""" + recs: list[str] = [] + + # Serious events + serious_pct = float(str(events_res.get("serious_pct", "0")).replace("%", "") or 0) + if serious_pct >= 3: + recs.append( + f"Moderate-or-above consequence events make up {serious_pct:.1f}% of recorded events. " + "Review whether controls around the highest-consequence scenarios are being verified often enough in field activity." + ) + + # LTI + if events_res.get("lti_count", 0) > 0: + recs.append( + f"{events_res.get('lti_count', 0)} Lost Time Injuries were recorded in the analysis window. " + "Review the underlying work types, contributing factors, and recovery actions for common patterns." + ) + + # Serious-event timing + time_buckets = events_res.get("serious_time_buckets", {}) + if time_buckets: + top_bucket = max(time_buckets, key=time_buckets.get) + recs.append( + f"Serious events are most frequently recorded in {top_bucket}. Use this to target pre-start, supervision, and fatigue controls at the riskiest parts of the day." + ) + + # Motor vehicle + motor = events_res.get("motor_vehicle", {}) + if motor.get("count", 0) > 0: + recs.append( + f"Motor vehicle events account for {motor.get('pct_total', 0):.1f}% of all events. " + "Review journey management, road conditions, and vehicle type patterns in the MVE section." + ) + top_mv_project = next(iter(motor.get("top_projects", {}).items()), None) + top_mv_road = next(iter(motor.get("road_types", {}).items()), None) + if top_mv_project and top_mv_road: + recs.append( + f"Prioritise a motor vehicle risk review for {top_mv_project[0]} where MV exposure is most visible, " + f"with particular attention to {top_mv_road[0]} driving conditions." + ) + + # Activity trend + trend = leading_res.get("activity_trend", "") + if "declining" in trend: + recs.append( + f"Leading activity volumes show a declining trend ({trend}). " + "Leaders should re-engage with LLC, CCC, and OCC completion targets." + ) + + # Declining BUs + for bu in focus_areas.get("declining_bus", []): + recs.append( + f"Business Unit '{bu}' shows declining leading-activity volume in the recent period. " + "Targeted engagement from the sector SHEQ team is recommended." + ) + + # High activity but high events + for bu in effectiveness.get("high_activity_high_events", []): + recs.append( + f"'{bu}' has both high leading-activity and high event volumes. " + "This may indicate reactive activity patterns — review whether conversations " + "are targeting root causes rather than responding after the fact." + ) + + project_watch = se_events_rel.get("project_comparison", {}).get("watch", []) + if project_watch: + top = project_watch[0] + recs.append( + f"Focus the next leadership review on project '{top.get('project')}', which recorded " + f"{top.get('events', 0)} events and {top.get('serious_events', 0)} serious events against " + f"{top.get('activities', 0)} leading activities." + ) + + location_watch = se_events_rel.get("location_comparison", {}).get("watch", []) + if location_watch: + top = location_watch[0] + recs.append( + f"Target field verification and local coaching at location '{top.get('location')}', where " + f"{top.get('serious_events', 0)} serious events have been recorded and the activity-to-event ratio is " + f"{top.get('activity_event_ratio', 'low')}." + ) + + project_best = se_events_rel.get("project_comparison", {}).get("best", []) + if project_best: + best = project_best[0] + recs.append( + f"Review what is working in project '{best.get('project')}', which shows comparatively strong leading-activity coverage " + f"with {best.get('activities', 0)} activities and {best.get('events', 0)} events, and replicate the practice in weaker areas." + ) + + # Gap themes + gap = at_risk_res.get("gap_themes", []) + if gap: + recs.append( + f"The following risk themes appear frequently in events but are under-represented " + f"in LLC conversations: {', '.join(gap)}. " + "Consider incorporating these topics into forthcoming LLC schedules." + ) + + for item in trends_res.get("high_volume_low_value", [])[:3]: + recs.append( + f"{item['business_unit']} has a high-volume / low-value {item['activity_type']} pattern " + f"({item['count']} records, {item['shallow_pct']:.1f}% shallow). Sample the underlying entries " + "with local leaders and tighten expectations for narrative quality and follow-up." + ) + + for item in trends_res.get("recommendations", [])[:2]: + recs.append(item) + + input_depth = trends_res.get("input_depth", {}) + if input_depth.get("correlation") is not None and input_depth["correlation"] >= 0.4: + recs.append( + f"Input depth is moderately aligned with record quality (r = {input_depth['correlation']:.2f}). Track populated-field depth as a simple supporting KPI alongside the richer quality score." + ) + + ccc = trends_res.get("activity_insights", {}).get("CCC", {}) + if ccc: + recs.append( + f"Lift CCC quality expectations in priority areas: current CCC quality averages {ccc.get('avg_quality', 0):.1f}/100 with " + f"{ccc.get('shallow_pct', 0):.1f}% shallow records and only {ccc.get('follow_up_pct', 0):.1f}% showing follow-up signals." + ) + + serious_projects = events_res.get("serious_projects", {}) + if serious_projects: + top_project, top_count = next(iter(serious_projects.items())) + recs.append( + f"Escalate a focused action plan for project '{top_project}', which currently has the highest serious-event burden ({top_count} serious events)." + ) + + # Default if nothing triggered + if not recs: + recs.append( + "No significant adverse trends identified in the current period. " + "Continue current leading activity cadence and monitor monthly." + ) + + deduped: list[str] = [] + seen: set[str] = set() + for rec in recs: + key = rec.strip() + if key and key not in seen: + seen.add(key) + deduped.append(key) + return deduped + + +# ───────────────────────────────────────────────────────────────────────────── +# Main orchestration +# ───────────────────────────────────────────────────────────────────────────── + +def run_full_analysis( + events: pd.DataFrame, + safety_energy: pd.DataFrame, + llc: pd.DataFrame, + start_date: str, + split_date: str, + pd1_name: str, + pd2_name: str, + output_dir: str, +) -> AnalysisResults: + """ + Run the complete SHEQ analysis pipeline across all three data sources. + + Parameters + ---------- + events : normalised Events DataFrame (from data_loader) + safety_energy : normalised Safety Energy DataFrame + llc : normalised LLC Data DataFrame + start_date : ISO date string — analysis window start + split_date : retained for backwards compatibility; ignored by the sector-wide report + pd1_name : retained for backwards compatibility; ignored by the sector-wide report + pd2_name : retained for backwards compatibility; ignored by the sector-wide report + output_dir : directory for chart images and output files + + Returns + ------- + AnalysisResults dataclass + """ + os.makedirs(output_dir, exist_ok=True) + log.info("=== SHEQ Full Analysis ===") + log.info(" start=%s output_dir=%s", start_date, output_dir) + + results = AnalysisResults() + results.params = { + "start_date": start_date, + "output_dir": output_dir, + } + + # 1. Data quality + log.info("[1/7] Data quality profiling...") + results.data_quality = _profile_data_quality(events, safety_energy, llc) + + # 2. Events analysis + log.info("[2/7] Events analysis...") + results.events_summary = _analyse_events( + events, start_date, split_date, pd1_name, pd2_name + ) + + # 3. Leading activity analysis + log.info("[3/7] Leading activity analysis...") + results.leading_summary = _analyse_leading(safety_energy, llc, start_date) + + # 4. Effectiveness analysis + log.info("[4/7] Effectiveness analysis...") + results.effectiveness = _analyse_effectiveness( + results.events_summary, results.leading_summary + ) + + # 5. At-risk behaviour analysis + log.info("[5/7] At-risk behaviour extraction...") + results.at_risk = _extract_at_risk_themes(events, safety_energy, llc, start_date) + + # 6. SE ↔ Events relationship + log.info("[6/7] Safety Energy ↔ Events relationship...") + results.se_events_rel = _analyse_se_events_relationship( + events, safety_energy, start_date + ) + + # Rolling two-year Safety Energy trends and quality + results.trends = _analyse_two_year_trends( + safety_energy, llc, events, start_date + ) + + # 7. Focus areas + results.focus_areas = _analyse_focus_areas(events, safety_energy, llc, start_date) + + # Charts + log.info("[7/7] Generating charts...") + results.charts = _generate_charts( + results.events_summary, + results.leading_summary, + results.se_events_rel, + results.at_risk, + results.trends, + output_dir, + pd1_name, + pd2_name, + split_date, + ) + + # Recommendations and caveats + results.recommendations = _generate_recommendations( + results.events_summary, + results.leading_summary, + results.effectiveness, + results.at_risk, + results.focus_areas, + results.trends, + results.se_events_rel, + ) + results.caveats = [ + "All analysis in this report is based on data exported from Ventia's safety management " + "system. Data quality depends on the completeness and accuracy of field entries.", + "Correlation and association findings do not imply causation. They are presented to " + "guide further investigation, not to draw definitive conclusions.", + "Activity counts reflect recorded activities only. Under-reporting in any area will " + "affect the reliability of leading-indicator analysis.", + "Theme extraction from free-text fields uses keyword matching and may miss nuance " + "or misclassify entries. Manual review of flagged themes is recommended.", + "Business unit comparisons may be affected by differences in headcount, contract scope, " + "and operational complexity between units.", + ] + + log.info("Analysis complete. %d charts, %d recommendations.", + len(results.charts), len(results.recommendations)) + + # Remove private DataFrames before returning (not needed by report_builder) + for key in ("_df", "_serious", "_motor"): + results.events_summary.pop(key, None) + for key in ("_se_f", "_llc_f"): + results.leading_summary.pop(key, None) + for key in ("_quality_df", "_llc_window"): + results.trends.pop(key, None) + + return results diff --git a/app.py b/app.py new file mode 100644 index 0000000..f3a01fe --- /dev/null +++ b/app.py @@ -0,0 +1,296 @@ +""" +app.py — SHEQ Analysis Tool — Flask web application. + +Run: + python app.py +Then open http://localhost:5000 + +The sidebar has two sections: + 1. Events Explorer — filter and chart Events data interactively. + 2. Generate Report — run the full analysis across Events, Safety Energy, + and LLC Data and download a comprehensive DOCX report. +""" + +from __future__ import annotations + +import logging +import os +from datetime import datetime + +import pandas as pd +from flask import Flask, jsonify, render_template, request, send_file + +from config import ( + EVENTS_FILE, LLC_FILE, SAFETY_ENERGY_FILE, + DEFAULT_PD1_NAME, DEFAULT_PD2_NAME, + DEFAULT_START_DATE, DEFAULT_SPLIT_DATE, + OUTPUT_DIR, +) +from data_loader import load_all, get_body_parts, load_and_prepare +from analysis_engine import run_full_analysis +from report_builder import build_report +from ppt_builder import build_presentation + +# ── Logging ────────────────────────────────────────────────────────────────── +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(name)s %(message)s", + datefmt="%H:%M:%S", +) +log = logging.getLogger("app") + +app = Flask(__name__) + +# ── Cached raw DataFrames (loaded on first request) ────────────────────────── +_CACHE: dict[str, pd.DataFrame | None] = { + "events": None, +} + + +def _get_events_df() -> pd.DataFrame: + """Return the raw Events DataFrame, loading from disk on first call.""" + if _CACHE["events"] is None: + log.info("Loading Events from %s", EVENTS_FILE) + df = pd.read_excel(EVENTS_FILE) + # Normalise date column — handle "Monday, 25 March 2024" and ISO formats + date_col = "EventDate" if "EventDate" in df.columns else "Event Date" + df["_date"] = df[date_col].apply(_parse_one_date) + _CACHE["events"] = df + return _CACHE["events"].copy() + + +def _parse_one_date(val) -> pd.Timestamp: + if pd.isna(val): + return pd.NaT + s = str(val).strip() + if "," in s and len(s.split(",")[0].split()) == 1: + s = s.split(",", 1)[1].strip() + try: + return pd.to_datetime(s, dayfirst=True) + except Exception: + return pd.NaT + + +# ───────────────────────────────────────────────────────────────────────────── +# Web UI +# ───────────────────────────────────────────────────────────────────────────── + +@app.route("/") +def index(): + df = _get_events_df() + min_date = df["_date"].min().strftime("%Y-%m-%d") + max_date = df["_date"].max().strftime("%Y-%m-%d") + + # Handle both column name variants + evt_col = "EventType" if "EventType" in df.columns else "Event Type" + cons_col = "Actual Consequence" + + event_types = sorted(df[evt_col].dropna().unique().tolist()) + consequences = sorted(df[cons_col].dropna().unique().tolist()) + + return render_template( + "index.html", + min_date=min_date, + max_date=max_date, + event_types=event_types, + consequences=consequences, + total_events=len(df), + ) + + +# ───────────────────────────────────────────────────────────────────────────── +# Events Explorer API +# ───────────────────────────────────────────────────────────────────────────── + +@app.route("/api/filter", methods=["POST"]) +def api_filter(): + """Return filtered summary stats as JSON for the Events Explorer.""" + params = request.json or {} + df = _get_events_df() + + evt_col = "EventType" if "EventType" in df.columns else "Event Type" + cons_col = "Actual Consequence" + crp_col = "CRP Involved" if "CRP Involved" in df.columns else "CRPInvolved" + rc_col = "Root Cause Category" + inj_col = "Ventia Injury Classification" + bp_col = "Bodily Location" + + # Filters + if params.get("start_date"): + df = df[df["_date"] >= pd.Timestamp(params["start_date"])] + if params.get("end_date"): + df = df[df["_date"] <= pd.Timestamp(params["end_date"])] + if params.get("event_types"): + df = df[df[evt_col].isin(params["event_types"])] + if params.get("consequences"): + df = df[df[cons_col].isin(params["consequences"])] + + if len(df) == 0: + return jsonify({"error": "No events match the selected filters.", "total": 0}) + + # Summary stats + evt_counts = df[evt_col].value_counts().to_dict() + cons_counts = df[cons_col].value_counts().to_dict() + + inj_class = ( + df[inj_col].value_counts().to_dict() + if inj_col in df.columns else {} + ) + + dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] + dow = ( + df["_date"].dt.day_name().value_counts() + .reindex(dow_order, fill_value=0).to_dict() + ) + + monthly = df.groupby(df["_date"].dt.to_period("M")).size() + monthly_data = {str(k): int(v) for k, v in monthly.items()} + + bp = ( + get_body_parts(df[bp_col]).value_counts().head(10).to_dict() + if bp_col in df.columns else {} + ) + + rc = ( + df[rc_col].value_counts().to_dict() + if rc_col in df.columns else {} + ) + + crp: dict = {} + if crp_col in df.columns: + crp = df[crp_col].value_counts().to_dict() + crp.pop("None Identified", None) + crp.pop("Under Investigation", None) + + # Investigation performance — use available columns + lag_col = next((c for c in ("Days to Investigate", "Event Lag", "Days to Enter") + if c in df.columns), None) + close_col = "Days to Close" if "Days to Close" in df.columns else None + + inv_med = df[lag_col].dropna().median() if lag_col else None + close_med = df[close_col].dropna().median() if close_col else None + + return jsonify({ + "total": len(df), + "date_range": ( + f"{df['_date'].min().strftime('%d %b %Y')} " + f"\u2013 {df['_date'].max().strftime('%d %b %Y')}" + ), + "event_types": evt_counts, + "consequences": cons_counts, + "injury_classification": inj_class, + "day_of_week": dow, + "monthly": monthly_data, + "body_parts": bp, + "root_causes": rc, + "crp": crp, + "median_investigate_days": round(inv_med, 1) if inv_med and pd.notna(inv_med) else None, + "median_close_days": round(close_med, 1) if close_med and pd.notna(close_med) else None, + "closed_pct": round( + (df["Status"] == "Closed").sum() / len(df) * 100, 1 + ) if "Status" in df.columns else None, + }) + + +# ───────────────────────────────────────────────────────────────────────────── +# Comprehensive Report API +# ───────────────────────────────────────────────────────────────────────────── + +@app.route("/api/generate_full_report", methods=["POST"]) +def api_generate_full_report(): + """ + Load all three data sources, run the full analysis pipeline, build the + DOCX report, and return it as a file download. + """ + params = request.json or {} + + start_date = params.get("start_date", DEFAULT_START_DATE) + export_format = str(params.get("export_format", "docx")).lower() + if export_format not in {"docx", "pptx"}: + return jsonify({"success": False, "error": "Supported export formats are DOCX and PPTX."}), 400 + + events_path = params.get("events_file", EVENTS_FILE) + se_path = params.get("safety_energy_file", SAFETY_ENERGY_FILE) + llc_path = params.get("llc_file", LLC_FILE) + + run_id = datetime.now().strftime("%Y%m%d_%H%M%S") + run_dir = os.path.join(OUTPUT_DIR, run_id) + + try: + log.info("Starting full report generation (run_id=%s)", run_id) + data = load_all(events_path, se_path, llc_path) + + results = run_full_analysis( + events = data["events"], + safety_energy = data["safety_energy"], + llc = data["llc"], + start_date = start_date, + split_date = DEFAULT_SPLIT_DATE, + pd1_name = DEFAULT_PD1_NAME, + pd2_name = DEFAULT_PD2_NAME, + output_dir = run_dir, + ) + + if export_format == "pptx": + report_path = build_presentation(results, run_dir) + download_name = f"SHEQ_Safety_Performance_{run_id}.pptx" + mimetype = "application/vnd.openxmlformats-officedocument.presentationml.presentation" + else: + report_path = build_report(results, run_dir) + download_name = f"SHEQ_Safety_Performance_{run_id}.docx" + mimetype = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + log.info("Report ready: %s", report_path) + + return send_file( + report_path, + as_attachment=True, + download_name=download_name, + mimetype=mimetype, + ) + + except FileNotFoundError as e: + log.error("File not found: %s", e) + return jsonify({"success": False, "error": str(e)}), 404 + except Exception as e: + log.exception("Report generation failed") + return jsonify({"success": False, "error": str(e)}), 500 + + +# ── Legacy endpoint (kept for backwards compatibility) ──────────────────────── + +@app.route("/api/download_report", methods=["POST"]) +def api_download_report(): + """Legacy Events-only PD comparison report (preserved from v1).""" + params = request.json or {} + start_date = params.get("start_date", DEFAULT_START_DATE) + split_date = params.get("split_date", DEFAULT_SPLIT_DATE) + pd1_name = params.get("pd1_name", DEFAULT_PD1_NAME) + pd2_name = params.get("pd2_name", DEFAULT_PD2_NAME) + + run_dir = os.path.join(OUTPUT_DIR, datetime.now().strftime("%Y%m%d_%H%M%S")) + + try: + from analysis import run_analysis + docx_path = run_analysis( + EVENTS_FILE, start_date, split_date, pd1_name, pd2_name, run_dir + ) + return send_file( + docx_path, as_attachment=True, + download_name="SHEQ_PD_Comparison.docx", + ) + except Exception as e: + log.exception("Legacy report generation failed") + return jsonify({"success": False, "error": str(e)}), 500 + + +# ───────────────────────────────────────────────────────────────────────────── +# Entry point +# ───────────────────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + os.makedirs(OUTPUT_DIR, exist_ok=True) + log.info("SHEQ Analysis Tool starting on http://localhost:5000") + log.info(" Events: %s", EVENTS_FILE) + log.info(" Safety Energy: %s", SAFETY_ENERGY_FILE) + log.info(" LLC Data: %s", LLC_FILE) + app.run(debug=True, port=5000) diff --git a/config.py b/config.py new file mode 100644 index 0000000..639b357 --- /dev/null +++ b/config.py @@ -0,0 +1,175 @@ +""" +config.py — Central configuration for the SHEQ Analysis Tool. + +Holds file paths, column name mappings, activity type definitions, +severity orders, and brand constants. Edit this file when source +column names change; do not touch the analysis or report modules. +""" + +from __future__ import annotations + +import os + +# ── Default file paths (resolved relative to this file's directory) ────────── +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +EVENTS_FILE = os.environ.get("SHEQ_EVENTS_FILE", os.path.join(BASE_DIR, "Events.xlsx")) +LLC_FILE = os.environ.get("SHEQ_LLC_FILE", os.path.join(BASE_DIR, "LLC_Data.xlsx")) +SAFETY_ENERGY_FILE = os.environ.get("SHEQ_SE_FILE", os.path.join(BASE_DIR, "Safety_Energy.xlsx")) +OUTPUT_DIR = os.environ.get("SHEQ_OUTPUT_DIR", os.path.join(BASE_DIR, "output")) + + +# ── Events.xlsx column mapping ───────────────────────────────────────────────── +# Maps a normalised internal name → list of candidate column names in order +# of preference. data_loader picks the first match it finds. +EVENTS_COL_MAP: dict[str, list[str]] = { + "date": ["EventDate", "Event Date", "Date"], + "event_type": ["EventType", "Event Type"], + "consequence": ["Actual Consequence"], + "potential": ["Potential Consequence"], + "status": ["Status"], + "business_unit": ["Business Unit"], + "project": ["Project"], + "location": ["Location", "Location.1"], + "crp": ["CRP Involved", "CRPInvolved"], + "root_cause_cat": ["Root Cause Category"], + "root_cause_sub": ["Root Cause Sub-Category"], + "injury_class": ["Ventia Injury Classification"], + "body_part": ["Bodily Location"], + "brief_desc": ["Brief Description"], + "event_desc": ["Event Description"], + "days_to_enter": ["Days to Enter"], + "event_lag": ["Event Lag"], + "report_lag": ["Report Lag"], + "investigation_done":["Investigation Completed"], + "hipo": ["HiPo"], + "critical_event": ["Critical Event"], +} + +# ── Safety_Energy.xlsx column mapping ───────────────────────────────────────── +SE_COL_MAP: dict[str, list[str]] = { + "date": ["EventDate", "Date Conducted", "CompletedDate"], + "module_name": ["ModuleName"], + "module_prefix": ["ModulePrefix"], + "module_type": ["ModuleType"], + "leader": ["CompletedByName", "Conducted By"], + "business_unit": ["Business Unit"], + "project": ["Project"], + "location": ["Location", "Specific Location"], + "shift": ["Shift"], + "at_risk_aspects":["At Risk Aspects"], + "total_questions":["Total Questions"], + "actions": ["Actions"], + "atl_actions": ["ATL Actions"], + "at_risk_crp": ["At risk CRP"], + "llc_topic": ["LLC Topic"], + "at_risk_obs": ["At risk situation/observation"], + "positive_obs": ["Positive Observation"], + "find_fix": ["Find & Fix", "Find&Fix"], + "participants": ["Number of people spoken to", "Participants"], + "time_spent": ["Time Spent on LLC"], +} + +# ── LLC_Data.xlsx column mapping ─────────────────────────────────────────────── +LLC_COL_MAP: dict[str, list[str]] = { + "date": ["EventDate", "Date Conducted", "Date"], + "topic": ["LLC Topic"], + "leader": ["Conducted by"], + "business_unit": ["Business Unit"], + "project": ["Project"], + "location": ["Location", "Specific Location"], + "crp_focus": ["CRP in Focus"], + "at_risk_obs": ["At risk situation/observation"], + "positive_obs": ["Positive Observation"], + "at_risk_flag": ["At risk work practices observed"], + "participants": ["Participants"], + "find_fix": ["Find&Fix", "Find & Fix"], + "review_action": ["Review & Action"], + "shift": ["Shift"], +} + + +# ── Activity type normalisation ──────────────────────────────────────────────── +# Safety_Energy ModuleType values → display label +MODULE_TYPE_LABELS: dict[str, str] = { + "Leader Learning Conversation": "LLC", + "Critical Control Check": "CCC", + "Operational Control Check": "OCC", +} + +# Canonical leading-activity types used throughout the report +LEADING_ACTIVITY_TYPES = ["LLC", "CCC", "OCC"] + +# NOTE on duplicate "OCC" label: +# In some legacy notes and older exports the label "OCC" appeared for items +# that are now split into "CCC" (Critical Control Check) and "OCC" +# (Operational Control Check). In the current Safety_Energy export both +# CCC and OCC are already correctly separated via ModuleType. The LLC_Data +# export contains only LLC-type records. No manual deduplication is +# required; however we collapse all three under "Safety Energy" when +# computing the combined domain total. + + +# ── Consequence severity ordering (low → high) ──────────────────────────────── +CONSEQUENCE_ORDER = ["Negligible", "Minor", "Moderate", "Major", "Substantial"] +CONSEQUENCE_SERIOUS = {"Moderate", "Major", "Substantial"} + +# ── Brand colours (hex) per DESIGN.md ───────────────────────────────────────── +DEEP_BLUE = "#0b3254" +SKY_BLUE = "#13b5ea" +DARK_GREEN = "#006e47" +MID_GREEN = "#009946" +LIGHT_GREEN = "#7bc143" +PURPLE = "#96358d" +AMBER = "#d97706" +RED = "#dc2626" +MUTED = "#64748b" +CARD_BG = "#f0f5fa" +PAGE_BG = "#f8fafc" +BORDER = "#e2e8f0" + +CHART_PALETTE = [DEEP_BLUE, SKY_BLUE, DARK_GREEN, MID_GREEN, + LIGHT_GREEN, PURPLE, AMBER, RED] + +# Activity type → colour mapping for charts +ACTIVITY_COLOURS: dict[str, str] = { + "LLC": DEEP_BLUE, + "CCC": SKY_BLUE, + "OCC": DARK_GREEN, +} + +# ── Report defaults ──────────────────────────────────────────────────────────── +DEFAULT_START_DATE = "2024-01-01" +DEFAULT_SPLIT_DATE = "2025-04-01" +DEFAULT_PD1_NAME = "Matthew Arthur" +DEFAULT_PD2_NAME = "Manga" + +# Minimum activity count for a leader to be included in focus tables +LEADER_MIN_ACTIVITIES = 5 + +# Correlation: minimum month-count required before reporting a correlation +CORR_MIN_MONTHS = 4 + +# Rolling window used for deeper Safety Energy trend analysis +TWO_YEAR_WINDOW_MONTHS = 24 + +# Quality scoring bands for leading-activity records +QUALITY_SCORE_BANDS = { + "high_value": 70, + "meaningful": 55, + "shallow": 35, +} + +# Keyword groups for at-risk theme extraction from free-text fields +AT_RISK_KEYWORDS: dict[str, list[str]] = { + "Manual Handling": ["manual handling", "lifting", "carrying", "musculoskeletal", "msd"], + "Working at Height": ["height", "ladder", "scaffold", "fall", "elevated"], + "Traffic/MVA": ["vehicle", "traffic", "driving", "reversing", "motor", "mva", "collision"], + "Hazardous Energy": ["energy", "electrical", "isolation", "loto", "stored energy", "pressure"], + "Slips/Trips/Falls": ["slip", "trip", "fall", "housekeeping", "wet floor", "uneven"], + "PPE": ["ppe", "personal protective", "helmet", "harness", "gloves", "safety glasses"], + "Fatigue": ["fatigue", "tired", "hours", "shift length", "rest"], + "Communication": ["communication", "briefing", "toolbox", "handover", "instruction"], + "Supervision": ["supervision", "supervision", "oversight", "leadership", "monitoring"], + "CRP Compliance": ["crp", "critical risk", "permit", "isolation", "confined space", "work at height"], +} diff --git a/data_loader.py b/data_loader.py new file mode 100644 index 0000000..7002e15 --- /dev/null +++ b/data_loader.py @@ -0,0 +1,378 @@ +""" +data_loader.py — Load and normalise the three SHEQ data sources. + +Each loader returns a pandas DataFrame with normalised column names +(defined in config.py) so that downstream analysis code is insulated +from changes to the source file schema. + +Public API +---------- +load_events(filepath) -> pd.DataFrame +load_safety_energy(filepath) -> pd.DataFrame +load_llc_data(filepath) -> pd.DataFrame +load_all(events_path, se_path, llc_path) -> dict[str, pd.DataFrame] +""" + +from __future__ import annotations + +import logging +import warnings +from pathlib import Path +from typing import Optional + +import pandas as pd + +from config import ( + EVENTS_COL_MAP, + SE_COL_MAP, + LLC_COL_MAP, + MODULE_TYPE_LABELS, + EVENTS_FILE, + SAFETY_ENERGY_FILE, + LLC_FILE, +) + +log = logging.getLogger(__name__) + +# Suppress openpyxl "no default style" warnings +warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl") + + +# ───────────────────────────────────────────────────────────────────────────── +# Internal helpers +# ───────────────────────────────────────────────────────────────────────────── + +def _resolve_col(df: pd.DataFrame, candidates: list[str], key: str) -> Optional[str]: + """Return the first candidate column that exists in df, or None.""" + for c in candidates: + if c in df.columns: + return c + log.debug("Column key '%s' not found (tried: %s)", key, candidates) + return None + + +def _parse_dates(series: pd.Series) -> pd.Series: + """ + Parse a date series that may contain: + - ISO strings "2024-01-15" + - Long-form strings "Monday, 15 January 2024" + - Excel datetime objects + Returns a tz-naive datetime64 series; unparseable values become NaT. + """ + if pd.api.types.is_datetime64_any_dtype(series): + return series.dt.tz_localize(None) if series.dt.tz is not None else series + + def _parse_one(val): + if pd.isna(val): + return pd.NaT + s = str(val).strip() + # Strip leading day-of-week "Monday, " prefix from long-form dates + if "," in s and len(s.split(",")[0].split()) == 1: + s = s.split(",", 1)[1].strip() + try: + return pd.to_datetime(s, dayfirst=True) + except Exception: + return pd.NaT + + return series.map(_parse_one) + + +def _remap(df: pd.DataFrame, col_map: dict[str, list[str]]) -> pd.DataFrame: + """ + Build a new DataFrame with normalised column names. + + For each key in col_map, find the first matching source column and + rename it. Columns not mentioned in col_map are dropped. The + original source columns are preserved under their original names as + well, allowing callers to access additional fields if needed. + """ + # Keep all original columns; add normalised aliases + result = df.copy() + for norm_name, candidates in col_map.items(): + src = _resolve_col(df, candidates, norm_name) + if src is not None and norm_name not in df.columns: + result[norm_name] = df[src] + elif src is not None: + result[norm_name] = df[src] + return result + + +def _null_rate(series: pd.Series) -> float: + """Return fraction of null / empty values (0–1).""" + return series.isna().mean() + + +def _profile(df: pd.DataFrame, label: str) -> dict: + """Return a simple quality profile dict for logging.""" + return { + "source": label, + "rows": len(df), + "cols": len(df.columns), + "date_nulls": _null_rate(df.get("date", pd.Series(dtype="object"))), + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Events loader +# ───────────────────────────────────────────────────────────────────────────── + +def load_events(filepath: str = EVENTS_FILE) -> pd.DataFrame: + """ + Load Events.xlsx and return a normalised DataFrame. + + Normalised columns (see EVENTS_COL_MAP): + date, event_type, consequence, status, business_unit, project, + location, crp, root_cause_cat, root_cause_sub, injury_class, + body_part, brief_desc, event_desc, days_to_enter, event_lag, + report_lag, investigation_done, hipo, critical_event + + Also adds: + year, month, year_month (Period[M]) + """ + path = Path(filepath) + if not path.exists(): + raise FileNotFoundError(f"Events file not found: {filepath}") + + log.info("Loading Events from %s", filepath) + raw = pd.read_excel(filepath) + log.info(" Raw shape: %s rows × %s cols", *raw.shape) + + df = _remap(raw, EVENTS_COL_MAP) + + # Parse dates + df["date"] = _parse_dates(df["date"]) + + # Drop rows with no date + n_before = len(df) + df = df.dropna(subset=["date"]).copy() + if len(df) < n_before: + log.warning(" Dropped %d rows with missing date", n_before - len(df)) + + # Derived time fields + df["year"] = df["date"].dt.year + df["month"] = df["date"].dt.month + df["year_month"] = df["date"].dt.to_period("M") + df["dow"] = df["date"].dt.day_name() + + # Normalise text fields + for col in ("event_type", "consequence", "business_unit", "project", + "root_cause_cat", "injury_class"): + if col in df.columns: + df[col] = df[col].astype(str).str.strip() + df[col] = df[col].replace({"nan": pd.NA, "None": pd.NA, "": pd.NA}) + + profile = _profile(df, "Events") + log.info(" Loaded %d events | BUs: %s", + profile["rows"], + list(df["business_unit"].dropna().unique()) if "business_unit" in df else "?") + + return df + + +# ───────────────────────────────────────────────────────────────────────────── +# Safety Energy loader +# ───────────────────────────────────────────────────────────────────────────── + +def load_safety_energy(filepath: str = SAFETY_ENERGY_FILE) -> pd.DataFrame: + """ + Load Safety_Energy.xlsx and return a normalised DataFrame. + + Safety Energy is the combined analytical domain covering all leading + activity types: LLC (Leader Learning Conversations), CCC (Critical + Control Checks), and OCC (Operational Control Checks). + + Normalised columns (see SE_COL_MAP): + date, module_name, module_prefix, module_type, activity_type + (short label: LLC/CCC/OCC), leader, business_unit, project, + location, at_risk_aspects, total_questions, actions, atl_actions, + at_risk_crp, llc_topic, at_risk_obs, positive_obs, participants + + Also adds: + year, month, year_month (Period[M]) + activity_type — shortened label from MODULE_TYPE_LABELS + """ + path = Path(filepath) + if not path.exists(): + raise FileNotFoundError(f"Safety Energy file not found: {filepath}") + + log.info("Loading Safety Energy from %s", filepath) + raw = pd.read_excel(filepath) + log.info(" Raw shape: %s rows × %s cols", *raw.shape) + + df = _remap(raw, SE_COL_MAP) + df["date"] = _parse_dates(df["date"]) + + n_before = len(df) + df = df.dropna(subset=["date"]).copy() + if len(df) < n_before: + log.warning(" Dropped %d rows with missing date", n_before - len(df)) + + # Shorten module_type to LLC / CCC / OCC label + df["activity_type"] = ( + df["module_type"] + .map(MODULE_TYPE_LABELS) + .fillna(df.get("module_type", pd.Series(dtype="str"))) + ) + + # Derived time fields + df["year"] = df["date"].dt.year + df["month"] = df["date"].dt.month + df["year_month"] = df["date"].dt.to_period("M") + + # Normalise text + for col in ("business_unit", "project", "leader", "activity_type"): + if col in df.columns: + df[col] = df[col].astype(str).str.strip() + df[col] = df[col].replace({"nan": pd.NA, "None": pd.NA, "": pd.NA}) + + # Numeric fields — coerce to numeric safely + for col in ("at_risk_aspects", "total_questions", "actions", "atl_actions"): + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors="coerce") + + log.info(" Loaded %d activities | types: %s", + len(df), + df["activity_type"].value_counts().to_dict() if "activity_type" in df else "?") + + return df + + +# ───────────────────────────────────────────────────────────────────────────── +# LLC Data loader +# ───────────────────────────────────────────────────────────────────────────── + +def load_llc_data(filepath: str = LLC_FILE) -> pd.DataFrame: + """ + Load LLC_Data.xlsx and return a normalised DataFrame. + + LLC_Data is a supplementary export of Leader Learning Conversations, + often containing richer free-text fields (topic, at-risk observations, + review & action notes) than the Safety_Energy export. + + Normalised columns (see LLC_COL_MAP): + date, topic, leader, business_unit, project, location, + crp_focus, at_risk_obs, positive_obs, at_risk_flag, participants + + Also adds: + year, month, year_month (Period[M]) + """ + path = Path(filepath) + if not path.exists(): + raise FileNotFoundError(f"LLC Data file not found: {filepath}") + + log.info("Loading LLC Data from %s", filepath) + raw = pd.read_excel(filepath) + log.info(" Raw shape: %s rows × %s cols", *raw.shape) + + df = _remap(raw, LLC_COL_MAP) + df["date"] = _parse_dates(df["date"]) + + n_before = len(df) + df = df.dropna(subset=["date"]).copy() + if len(df) < n_before: + log.warning(" Dropped %d rows with missing date", n_before - len(df)) + + df["year"] = df["date"].dt.year + df["month"] = df["date"].dt.month + df["year_month"] = df["date"].dt.to_period("M") + + for col in ("business_unit", "project", "leader", "topic", "crp_focus"): + if col in df.columns: + df[col] = df[col].astype(str).str.strip() + df[col] = df[col].replace({"nan": pd.NA, "None": pd.NA, "": pd.NA}) + + # at_risk_flag is a count field in this export + if "at_risk_flag" in df.columns: + df["at_risk_flag"] = pd.to_numeric(df["at_risk_flag"], errors="coerce") + + log.info(" Loaded %d LLC records | BUs: %s", + len(df), + list(df["business_unit"].dropna().unique()) if "business_unit" in df else "?") + + return df + + +# ───────────────────────────────────────────────────────────────────────────── +# Combined loader +# ───────────────────────────────────────────────────────────────────────────── + +def load_all( + events_path: str = EVENTS_FILE, + se_path: str = SAFETY_ENERGY_FILE, + llc_path: str = LLC_FILE, +) -> dict[str, pd.DataFrame]: + """ + Load all three data sources and return a dict with keys: + 'events' -> normalised Events DataFrame + 'safety_energy' -> normalised Safety Energy DataFrame + 'llc' -> normalised LLC Data DataFrame + + Raises FileNotFoundError with a descriptive message if any file + is missing. + """ + return { + "events": load_events(events_path), + "safety_energy": load_safety_energy(se_path), + "llc": load_llc_data(llc_path), + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Backwards-compatibility shim for old analysis.py +# ───────────────────────────────────────────────────────────────────────────── + +def load_and_prepare(filepath: str, start_date: str, split_date: str) -> pd.DataFrame: + """ + Backwards-compatible wrapper used by the old analysis.py module. + + Returns Events data filtered to start_date onwards, with a 'PD' + column (pd1 / pd2) based on split_date. + """ + df = load_events(filepath) + + # Rename normalised columns back to legacy names for old analysis.py + rename_map = { + "date": "Event Date", + "event_type": "Event Type", + "consequence": "Actual Consequence", + "crp": "CRPInvolved", + "root_cause_cat":"Root Cause Category", + "injury_class": "Ventia Injury Classification", + "body_part": "Bodily Location", + } + df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}) + + # Handle missing columns that old code expects + if "Days to Investigate" not in df.columns: + df["Days to Investigate"] = df.get("event_lag", pd.Series(dtype="float64")) + if "Days to Close" not in df.columns: + df["Days to Close"] = pd.to_numeric( + pd.to_datetime(df.get("ClosedAtDate"), errors="coerce") + .sub(df["Event Date"]) + .dt.days, + errors="coerce", + ) + if "CRPInvolved" not in df.columns: + df["CRPInvolved"] = df.get("CRP Involved", pd.NA) + + df = df[df["Event Date"] >= pd.Timestamp(start_date)].copy() + df["Year"] = df["Event Date"].dt.year + df["Month"] = df["Event Date"].dt.month + df["MonthName"] = df["Event Date"].dt.strftime("%b") + df["DOW"] = df["Event Date"].dt.day_name() + df["YearMonth"] = df["Event Date"].dt.to_period("M") + df["PD"] = df["Event Date"].apply( + lambda x: "pd1" if x < pd.Timestamp(split_date) else "pd2" + ) + return df + + +def get_body_parts(series: pd.Series) -> pd.Series: + """Split multi-value body part entries and normalise (legacy helper).""" + parts = [] + for val in series.dropna(): + for part in str(val).split(","): + part = part.strip() + if part and "unspecified" not in part.lower(): + parts.append(part) + return pd.Series(parts) diff --git a/output/20260409_140022/SHEQ_PD_Comparison.docx b/output/20260409_140022/SHEQ_PD_Comparison.docx new file mode 100644 index 0000000..22d465a Binary files /dev/null and b/output/20260409_140022/SHEQ_PD_Comparison.docx differ diff --git a/output/20260409_140022/body_parts.png b/output/20260409_140022/body_parts.png new file mode 100644 index 0000000..200e24f Binary files /dev/null and b/output/20260409_140022/body_parts.png differ diff --git a/output/20260409_140022/consequence_by_pd.png b/output/20260409_140022/consequence_by_pd.png new file mode 100644 index 0000000..d1d5df0 Binary files /dev/null and b/output/20260409_140022/consequence_by_pd.png differ diff --git a/output/20260409_140022/crp_by_pd.png b/output/20260409_140022/crp_by_pd.png new file mode 100644 index 0000000..49e270a Binary files /dev/null and b/output/20260409_140022/crp_by_pd.png differ diff --git a/output/20260409_140022/dow_by_pd.png b/output/20260409_140022/dow_by_pd.png new file mode 100644 index 0000000..e391bb8 Binary files /dev/null and b/output/20260409_140022/dow_by_pd.png differ diff --git a/output/20260409_140022/event_type_by_pd.png b/output/20260409_140022/event_type_by_pd.png new file mode 100644 index 0000000..ec6436a Binary files /dev/null and b/output/20260409_140022/event_type_by_pd.png differ diff --git a/output/20260409_140022/monthly_by_pd.png b/output/20260409_140022/monthly_by_pd.png new file mode 100644 index 0000000..ccabf0f Binary files /dev/null and b/output/20260409_140022/monthly_by_pd.png differ diff --git a/output/20260409_140022/rootcause_by_pd.png b/output/20260409_140022/rootcause_by_pd.png new file mode 100644 index 0000000..156b199 Binary files /dev/null and b/output/20260409_140022/rootcause_by_pd.png differ diff --git a/output/20260409_143911/SHEQ_Safety_Performance_20260409_1439.docx b/output/20260409_143911/SHEQ_Safety_Performance_20260409_1439.docx new file mode 100644 index 0000000..bd1762b Binary files /dev/null and b/output/20260409_143911/SHEQ_Safety_Performance_20260409_1439.docx differ diff --git a/output/20260409_143911/ch_activity_mix.png b/output/20260409_143911/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_143911/ch_activity_mix.png differ diff --git a/output/20260409_143911/ch_at_risk_themes.png b/output/20260409_143911/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_143911/ch_at_risk_themes.png differ diff --git a/output/20260409_143911/ch_bu_comparison.png b/output/20260409_143911/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_143911/ch_bu_comparison.png differ diff --git a/output/20260409_143911/ch_consequence.png b/output/20260409_143911/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_143911/ch_consequence.png differ diff --git a/output/20260409_143911/ch_crp_focus.png b/output/20260409_143911/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_143911/ch_crp_focus.png differ diff --git a/output/20260409_143911/ch_events_monthly.png b/output/20260409_143911/ch_events_monthly.png new file mode 100644 index 0000000..0d16ed1 Binary files /dev/null and b/output/20260409_143911/ch_events_monthly.png differ diff --git a/output/20260409_143911/ch_leading_monthly.png b/output/20260409_143911/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_143911/ch_leading_monthly.png differ diff --git a/output/20260409_143911/ch_llc_topics.png b/output/20260409_143911/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_143911/ch_llc_topics.png differ diff --git a/output/20260409_143911/ch_overlay.png b/output/20260409_143911/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_143911/ch_overlay.png differ diff --git a/output/20260409_143911/ch_top_leaders.png b/output/20260409_143911/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_143911/ch_top_leaders.png differ diff --git a/output/20260409_145534/SHEQ_Safety_Performance_20260409_1456.docx b/output/20260409_145534/SHEQ_Safety_Performance_20260409_1456.docx new file mode 100644 index 0000000..81fa20b Binary files /dev/null and b/output/20260409_145534/SHEQ_Safety_Performance_20260409_1456.docx differ diff --git a/output/20260409_145534/ch_activity_mix.png b/output/20260409_145534/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_145534/ch_activity_mix.png differ diff --git a/output/20260409_145534/ch_at_risk_themes.png b/output/20260409_145534/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_145534/ch_at_risk_themes.png differ diff --git a/output/20260409_145534/ch_bu_comparison.png b/output/20260409_145534/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_145534/ch_bu_comparison.png differ diff --git a/output/20260409_145534/ch_consequence.png b/output/20260409_145534/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_145534/ch_consequence.png differ diff --git a/output/20260409_145534/ch_crp_focus.png b/output/20260409_145534/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_145534/ch_crp_focus.png differ diff --git a/output/20260409_145534/ch_events_monthly.png b/output/20260409_145534/ch_events_monthly.png new file mode 100644 index 0000000..0d16ed1 Binary files /dev/null and b/output/20260409_145534/ch_events_monthly.png differ diff --git a/output/20260409_145534/ch_leading_monthly.png b/output/20260409_145534/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_145534/ch_leading_monthly.png differ diff --git a/output/20260409_145534/ch_llc_topics.png b/output/20260409_145534/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_145534/ch_llc_topics.png differ diff --git a/output/20260409_145534/ch_low_value_units.png b/output/20260409_145534/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/20260409_145534/ch_low_value_units.png differ diff --git a/output/20260409_145534/ch_overlay.png b/output/20260409_145534/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_145534/ch_overlay.png differ diff --git a/output/20260409_145534/ch_quality_trend.png b/output/20260409_145534/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/20260409_145534/ch_quality_trend.png differ diff --git a/output/20260409_145534/ch_top_leaders.png b/output/20260409_145534/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_145534/ch_top_leaders.png differ diff --git a/output/20260409_151656/SHEQ_Safety_Performance_20260409_1517.docx b/output/20260409_151656/SHEQ_Safety_Performance_20260409_1517.docx new file mode 100644 index 0000000..7f3dc34 Binary files /dev/null and b/output/20260409_151656/SHEQ_Safety_Performance_20260409_1517.docx differ diff --git a/output/20260409_151656/ch_activity_mix.png b/output/20260409_151656/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_151656/ch_activity_mix.png differ diff --git a/output/20260409_151656/ch_at_risk_themes.png b/output/20260409_151656/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_151656/ch_at_risk_themes.png differ diff --git a/output/20260409_151656/ch_bu_comparison.png b/output/20260409_151656/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_151656/ch_bu_comparison.png differ diff --git a/output/20260409_151656/ch_consequence.png b/output/20260409_151656/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_151656/ch_consequence.png differ diff --git a/output/20260409_151656/ch_crp_focus.png b/output/20260409_151656/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_151656/ch_crp_focus.png differ diff --git a/output/20260409_151656/ch_events_monthly.png b/output/20260409_151656/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/20260409_151656/ch_events_monthly.png differ diff --git a/output/20260409_151656/ch_leading_monthly.png b/output/20260409_151656/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_151656/ch_leading_monthly.png differ diff --git a/output/20260409_151656/ch_llc_topics.png b/output/20260409_151656/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_151656/ch_llc_topics.png differ diff --git a/output/20260409_151656/ch_low_value_units.png b/output/20260409_151656/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/20260409_151656/ch_low_value_units.png differ diff --git a/output/20260409_151656/ch_overlay.png b/output/20260409_151656/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_151656/ch_overlay.png differ diff --git a/output/20260409_151656/ch_quality_trend.png b/output/20260409_151656/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/20260409_151656/ch_quality_trend.png differ diff --git a/output/20260409_151656/ch_top_leaders.png b/output/20260409_151656/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_151656/ch_top_leaders.png differ diff --git a/output/20260409_153855/SHEQ_Safety_Performance_20260409_1539.docx b/output/20260409_153855/SHEQ_Safety_Performance_20260409_1539.docx new file mode 100644 index 0000000..4d407f2 Binary files /dev/null and b/output/20260409_153855/SHEQ_Safety_Performance_20260409_1539.docx differ diff --git a/output/20260409_153855/ch_activity_mix.png b/output/20260409_153855/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_153855/ch_activity_mix.png differ diff --git a/output/20260409_153855/ch_at_risk_themes.png b/output/20260409_153855/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_153855/ch_at_risk_themes.png differ diff --git a/output/20260409_153855/ch_bu_comparison.png b/output/20260409_153855/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_153855/ch_bu_comparison.png differ diff --git a/output/20260409_153855/ch_consequence.png b/output/20260409_153855/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_153855/ch_consequence.png differ diff --git a/output/20260409_153855/ch_crp_focus.png b/output/20260409_153855/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_153855/ch_crp_focus.png differ diff --git a/output/20260409_153855/ch_events_monthly.png b/output/20260409_153855/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/20260409_153855/ch_events_monthly.png differ diff --git a/output/20260409_153855/ch_leading_monthly.png b/output/20260409_153855/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_153855/ch_leading_monthly.png differ diff --git a/output/20260409_153855/ch_llc_topics.png b/output/20260409_153855/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_153855/ch_llc_topics.png differ diff --git a/output/20260409_153855/ch_low_value_units.png b/output/20260409_153855/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/20260409_153855/ch_low_value_units.png differ diff --git a/output/20260409_153855/ch_overlay.png b/output/20260409_153855/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_153855/ch_overlay.png differ diff --git a/output/20260409_153855/ch_quality_trend.png b/output/20260409_153855/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/20260409_153855/ch_quality_trend.png differ diff --git a/output/20260409_153855/ch_top_leaders.png b/output/20260409_153855/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_153855/ch_top_leaders.png differ diff --git a/output/20260409_154738/SHEQ_Safety_Performance_20260409_1548.docx b/output/20260409_154738/SHEQ_Safety_Performance_20260409_1548.docx new file mode 100644 index 0000000..fa96bc4 Binary files /dev/null and b/output/20260409_154738/SHEQ_Safety_Performance_20260409_1548.docx differ diff --git a/output/20260409_154738/ch_activity_mix.png b/output/20260409_154738/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_154738/ch_activity_mix.png differ diff --git a/output/20260409_154738/ch_at_risk_themes.png b/output/20260409_154738/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_154738/ch_at_risk_themes.png differ diff --git a/output/20260409_154738/ch_bu_comparison.png b/output/20260409_154738/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_154738/ch_bu_comparison.png differ diff --git a/output/20260409_154738/ch_consequence.png b/output/20260409_154738/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_154738/ch_consequence.png differ diff --git a/output/20260409_154738/ch_crp_focus.png b/output/20260409_154738/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_154738/ch_crp_focus.png differ diff --git a/output/20260409_154738/ch_events_monthly.png b/output/20260409_154738/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/20260409_154738/ch_events_monthly.png differ diff --git a/output/20260409_154738/ch_leading_monthly.png b/output/20260409_154738/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_154738/ch_leading_monthly.png differ diff --git a/output/20260409_154738/ch_llc_topics.png b/output/20260409_154738/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_154738/ch_llc_topics.png differ diff --git a/output/20260409_154738/ch_low_value_units.png b/output/20260409_154738/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/20260409_154738/ch_low_value_units.png differ diff --git a/output/20260409_154738/ch_overlay.png b/output/20260409_154738/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_154738/ch_overlay.png differ diff --git a/output/20260409_154738/ch_quality_mix.png b/output/20260409_154738/ch_quality_mix.png new file mode 100644 index 0000000..d4ce282 Binary files /dev/null and b/output/20260409_154738/ch_quality_mix.png differ diff --git a/output/20260409_154738/ch_quality_trend.png b/output/20260409_154738/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/20260409_154738/ch_quality_trend.png differ diff --git a/output/20260409_154738/ch_serious_hotspots.png b/output/20260409_154738/ch_serious_hotspots.png new file mode 100644 index 0000000..5ad0b4d Binary files /dev/null and b/output/20260409_154738/ch_serious_hotspots.png differ diff --git a/output/20260409_154738/ch_top_leaders.png b/output/20260409_154738/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_154738/ch_top_leaders.png differ diff --git a/output/20260409_160418/SHEQ_Safety_Performance_20260409_1604.docx b/output/20260409_160418/SHEQ_Safety_Performance_20260409_1604.docx new file mode 100644 index 0000000..17726e6 Binary files /dev/null and b/output/20260409_160418/SHEQ_Safety_Performance_20260409_1604.docx differ diff --git a/output/20260409_160418/ch_activity_mix.png b/output/20260409_160418/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_160418/ch_activity_mix.png differ diff --git a/output/20260409_160418/ch_at_risk_themes.png b/output/20260409_160418/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_160418/ch_at_risk_themes.png differ diff --git a/output/20260409_160418/ch_bu_comparison.png b/output/20260409_160418/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_160418/ch_bu_comparison.png differ diff --git a/output/20260409_160418/ch_consequence.png b/output/20260409_160418/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_160418/ch_consequence.png differ diff --git a/output/20260409_160418/ch_crp_focus.png b/output/20260409_160418/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_160418/ch_crp_focus.png differ diff --git a/output/20260409_160418/ch_events_monthly.png b/output/20260409_160418/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/20260409_160418/ch_events_monthly.png differ diff --git a/output/20260409_160418/ch_leading_monthly.png b/output/20260409_160418/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_160418/ch_leading_monthly.png differ diff --git a/output/20260409_160418/ch_llc_topics.png b/output/20260409_160418/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_160418/ch_llc_topics.png differ diff --git a/output/20260409_160418/ch_low_value_units.png b/output/20260409_160418/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/20260409_160418/ch_low_value_units.png differ diff --git a/output/20260409_160418/ch_overlay.png b/output/20260409_160418/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_160418/ch_overlay.png differ diff --git a/output/20260409_160418/ch_project_quadrant.png b/output/20260409_160418/ch_project_quadrant.png new file mode 100644 index 0000000..9a164f3 Binary files /dev/null and b/output/20260409_160418/ch_project_quadrant.png differ diff --git a/output/20260409_160418/ch_quality_mix.png b/output/20260409_160418/ch_quality_mix.png new file mode 100644 index 0000000..d4ce282 Binary files /dev/null and b/output/20260409_160418/ch_quality_mix.png differ diff --git a/output/20260409_160418/ch_quality_trend.png b/output/20260409_160418/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/20260409_160418/ch_quality_trend.png differ diff --git a/output/20260409_160418/ch_serious_hotspots.png b/output/20260409_160418/ch_serious_hotspots.png new file mode 100644 index 0000000..5ad0b4d Binary files /dev/null and b/output/20260409_160418/ch_serious_hotspots.png differ diff --git a/output/20260409_160418/ch_top_leaders.png b/output/20260409_160418/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_160418/ch_top_leaders.png differ diff --git a/output/20260409_160455/SHEQ_Safety_Performance_20260409_1605.pptx b/output/20260409_160455/SHEQ_Safety_Performance_20260409_1605.pptx new file mode 100644 index 0000000..6a65168 Binary files /dev/null and b/output/20260409_160455/SHEQ_Safety_Performance_20260409_1605.pptx differ diff --git a/output/20260409_160455/ch_activity_mix.png b/output/20260409_160455/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_160455/ch_activity_mix.png differ diff --git a/output/20260409_160455/ch_at_risk_themes.png b/output/20260409_160455/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_160455/ch_at_risk_themes.png differ diff --git a/output/20260409_160455/ch_bu_comparison.png b/output/20260409_160455/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_160455/ch_bu_comparison.png differ diff --git a/output/20260409_160455/ch_consequence.png b/output/20260409_160455/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_160455/ch_consequence.png differ diff --git a/output/20260409_160455/ch_crp_focus.png b/output/20260409_160455/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_160455/ch_crp_focus.png differ diff --git a/output/20260409_160455/ch_events_monthly.png b/output/20260409_160455/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/20260409_160455/ch_events_monthly.png differ diff --git a/output/20260409_160455/ch_leading_monthly.png b/output/20260409_160455/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_160455/ch_leading_monthly.png differ diff --git a/output/20260409_160455/ch_llc_topics.png b/output/20260409_160455/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_160455/ch_llc_topics.png differ diff --git a/output/20260409_160455/ch_low_value_units.png b/output/20260409_160455/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/20260409_160455/ch_low_value_units.png differ diff --git a/output/20260409_160455/ch_overlay.png b/output/20260409_160455/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_160455/ch_overlay.png differ diff --git a/output/20260409_160455/ch_project_quadrant.png b/output/20260409_160455/ch_project_quadrant.png new file mode 100644 index 0000000..9a164f3 Binary files /dev/null and b/output/20260409_160455/ch_project_quadrant.png differ diff --git a/output/20260409_160455/ch_quality_mix.png b/output/20260409_160455/ch_quality_mix.png new file mode 100644 index 0000000..d4ce282 Binary files /dev/null and b/output/20260409_160455/ch_quality_mix.png differ diff --git a/output/20260409_160455/ch_quality_trend.png b/output/20260409_160455/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/20260409_160455/ch_quality_trend.png differ diff --git a/output/20260409_160455/ch_serious_hotspots.png b/output/20260409_160455/ch_serious_hotspots.png new file mode 100644 index 0000000..5ad0b4d Binary files /dev/null and b/output/20260409_160455/ch_serious_hotspots.png differ diff --git a/output/20260409_160455/ch_top_leaders.png b/output/20260409_160455/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_160455/ch_top_leaders.png differ diff --git a/output/20260409_161607/SHEQ_Safety_Performance_20260409_1616.pptx b/output/20260409_161607/SHEQ_Safety_Performance_20260409_1616.pptx new file mode 100644 index 0000000..1ffc33a Binary files /dev/null and b/output/20260409_161607/SHEQ_Safety_Performance_20260409_1616.pptx differ diff --git a/output/20260409_161607/ch_activity_mix.png b/output/20260409_161607/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_161607/ch_activity_mix.png differ diff --git a/output/20260409_161607/ch_at_risk_themes.png b/output/20260409_161607/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_161607/ch_at_risk_themes.png differ diff --git a/output/20260409_161607/ch_bu_comparison.png b/output/20260409_161607/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_161607/ch_bu_comparison.png differ diff --git a/output/20260409_161607/ch_consequence.png b/output/20260409_161607/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_161607/ch_consequence.png differ diff --git a/output/20260409_161607/ch_crp_focus.png b/output/20260409_161607/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_161607/ch_crp_focus.png differ diff --git a/output/20260409_161607/ch_events_monthly.png b/output/20260409_161607/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/20260409_161607/ch_events_monthly.png differ diff --git a/output/20260409_161607/ch_leading_monthly.png b/output/20260409_161607/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_161607/ch_leading_monthly.png differ diff --git a/output/20260409_161607/ch_llc_topics.png b/output/20260409_161607/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_161607/ch_llc_topics.png differ diff --git a/output/20260409_161607/ch_low_value_units.png b/output/20260409_161607/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/20260409_161607/ch_low_value_units.png differ diff --git a/output/20260409_161607/ch_overlay.png b/output/20260409_161607/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_161607/ch_overlay.png differ diff --git a/output/20260409_161607/ch_project_quadrant.png b/output/20260409_161607/ch_project_quadrant.png new file mode 100644 index 0000000..9a164f3 Binary files /dev/null and b/output/20260409_161607/ch_project_quadrant.png differ diff --git a/output/20260409_161607/ch_quality_mix.png b/output/20260409_161607/ch_quality_mix.png new file mode 100644 index 0000000..d4ce282 Binary files /dev/null and b/output/20260409_161607/ch_quality_mix.png differ diff --git a/output/20260409_161607/ch_quality_trend.png b/output/20260409_161607/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/20260409_161607/ch_quality_trend.png differ diff --git a/output/20260409_161607/ch_serious_hotspots.png b/output/20260409_161607/ch_serious_hotspots.png new file mode 100644 index 0000000..5ad0b4d Binary files /dev/null and b/output/20260409_161607/ch_serious_hotspots.png differ diff --git a/output/20260409_161607/ch_top_leaders.png b/output/20260409_161607/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_161607/ch_top_leaders.png differ diff --git a/output/20260409_161722/SHEQ_Safety_Performance_20260409_1617.docx b/output/20260409_161722/SHEQ_Safety_Performance_20260409_1617.docx new file mode 100644 index 0000000..e3fdd11 Binary files /dev/null and b/output/20260409_161722/SHEQ_Safety_Performance_20260409_1617.docx differ diff --git a/output/20260409_161722/ch_activity_mix.png b/output/20260409_161722/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_161722/ch_activity_mix.png differ diff --git a/output/20260409_161722/ch_at_risk_themes.png b/output/20260409_161722/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_161722/ch_at_risk_themes.png differ diff --git a/output/20260409_161722/ch_bu_comparison.png b/output/20260409_161722/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_161722/ch_bu_comparison.png differ diff --git a/output/20260409_161722/ch_consequence.png b/output/20260409_161722/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_161722/ch_consequence.png differ diff --git a/output/20260409_161722/ch_crp_focus.png b/output/20260409_161722/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_161722/ch_crp_focus.png differ diff --git a/output/20260409_161722/ch_events_monthly.png b/output/20260409_161722/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/20260409_161722/ch_events_monthly.png differ diff --git a/output/20260409_161722/ch_leading_monthly.png b/output/20260409_161722/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_161722/ch_leading_monthly.png differ diff --git a/output/20260409_161722/ch_llc_topics.png b/output/20260409_161722/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_161722/ch_llc_topics.png differ diff --git a/output/20260409_161722/ch_low_value_units.png b/output/20260409_161722/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/20260409_161722/ch_low_value_units.png differ diff --git a/output/20260409_161722/ch_overlay.png b/output/20260409_161722/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_161722/ch_overlay.png differ diff --git a/output/20260409_161722/ch_project_quadrant.png b/output/20260409_161722/ch_project_quadrant.png new file mode 100644 index 0000000..9a164f3 Binary files /dev/null and b/output/20260409_161722/ch_project_quadrant.png differ diff --git a/output/20260409_161722/ch_quality_mix.png b/output/20260409_161722/ch_quality_mix.png new file mode 100644 index 0000000..d4ce282 Binary files /dev/null and b/output/20260409_161722/ch_quality_mix.png differ diff --git a/output/20260409_161722/ch_quality_trend.png b/output/20260409_161722/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/20260409_161722/ch_quality_trend.png differ diff --git a/output/20260409_161722/ch_serious_hotspots.png b/output/20260409_161722/ch_serious_hotspots.png new file mode 100644 index 0000000..5ad0b4d Binary files /dev/null and b/output/20260409_161722/ch_serious_hotspots.png differ diff --git a/output/20260409_161722/ch_top_leaders.png b/output/20260409_161722/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_161722/ch_top_leaders.png differ diff --git a/output/20260409_171650/SHEQ_Safety_Performance_20260409_1717.docx b/output/20260409_171650/SHEQ_Safety_Performance_20260409_1717.docx new file mode 100644 index 0000000..00e50ef Binary files /dev/null and b/output/20260409_171650/SHEQ_Safety_Performance_20260409_1717.docx differ diff --git a/output/20260409_171650/ch_activity_mix.png b/output/20260409_171650/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/20260409_171650/ch_activity_mix.png differ diff --git a/output/20260409_171650/ch_at_risk_themes.png b/output/20260409_171650/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/20260409_171650/ch_at_risk_themes.png differ diff --git a/output/20260409_171650/ch_bu_comparison.png b/output/20260409_171650/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/20260409_171650/ch_bu_comparison.png differ diff --git a/output/20260409_171650/ch_consequence.png b/output/20260409_171650/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/20260409_171650/ch_consequence.png differ diff --git a/output/20260409_171650/ch_crp_focus.png b/output/20260409_171650/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/20260409_171650/ch_crp_focus.png differ diff --git a/output/20260409_171650/ch_events_monthly.png b/output/20260409_171650/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/20260409_171650/ch_events_monthly.png differ diff --git a/output/20260409_171650/ch_leading_monthly.png b/output/20260409_171650/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/20260409_171650/ch_leading_monthly.png differ diff --git a/output/20260409_171650/ch_llc_topics.png b/output/20260409_171650/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/20260409_171650/ch_llc_topics.png differ diff --git a/output/20260409_171650/ch_low_value_units.png b/output/20260409_171650/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/20260409_171650/ch_low_value_units.png differ diff --git a/output/20260409_171650/ch_overlay.png b/output/20260409_171650/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/20260409_171650/ch_overlay.png differ diff --git a/output/20260409_171650/ch_project_quadrant.png b/output/20260409_171650/ch_project_quadrant.png new file mode 100644 index 0000000..9a164f3 Binary files /dev/null and b/output/20260409_171650/ch_project_quadrant.png differ diff --git a/output/20260409_171650/ch_quality_mix.png b/output/20260409_171650/ch_quality_mix.png new file mode 100644 index 0000000..d4ce282 Binary files /dev/null and b/output/20260409_171650/ch_quality_mix.png differ diff --git a/output/20260409_171650/ch_quality_trend.png b/output/20260409_171650/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/20260409_171650/ch_quality_trend.png differ diff --git a/output/20260409_171650/ch_serious_hotspots.png b/output/20260409_171650/ch_serious_hotspots.png new file mode 100644 index 0000000..5ad0b4d Binary files /dev/null and b/output/20260409_171650/ch_serious_hotspots.png differ diff --git a/output/20260409_171650/ch_top_leaders.png b/output/20260409_171650/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/20260409_171650/ch_top_leaders.png differ diff --git a/output/board_visuals/SHEQ_Safety_Performance_20260409_1548.docx b/output/board_visuals/SHEQ_Safety_Performance_20260409_1548.docx new file mode 100644 index 0000000..fa96bc4 Binary files /dev/null and b/output/board_visuals/SHEQ_Safety_Performance_20260409_1548.docx differ diff --git a/output/board_visuals/ch_activity_mix.png b/output/board_visuals/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/board_visuals/ch_activity_mix.png differ diff --git a/output/board_visuals/ch_at_risk_themes.png b/output/board_visuals/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/board_visuals/ch_at_risk_themes.png differ diff --git a/output/board_visuals/ch_bu_comparison.png b/output/board_visuals/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/board_visuals/ch_bu_comparison.png differ diff --git a/output/board_visuals/ch_consequence.png b/output/board_visuals/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/board_visuals/ch_consequence.png differ diff --git a/output/board_visuals/ch_crp_focus.png b/output/board_visuals/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/board_visuals/ch_crp_focus.png differ diff --git a/output/board_visuals/ch_events_monthly.png b/output/board_visuals/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/board_visuals/ch_events_monthly.png differ diff --git a/output/board_visuals/ch_leading_monthly.png b/output/board_visuals/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/board_visuals/ch_leading_monthly.png differ diff --git a/output/board_visuals/ch_llc_topics.png b/output/board_visuals/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/board_visuals/ch_llc_topics.png differ diff --git a/output/board_visuals/ch_low_value_units.png b/output/board_visuals/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/board_visuals/ch_low_value_units.png differ diff --git a/output/board_visuals/ch_overlay.png b/output/board_visuals/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/board_visuals/ch_overlay.png differ diff --git a/output/board_visuals/ch_quality_mix.png b/output/board_visuals/ch_quality_mix.png new file mode 100644 index 0000000..d4ce282 Binary files /dev/null and b/output/board_visuals/ch_quality_mix.png differ diff --git a/output/board_visuals/ch_quality_trend.png b/output/board_visuals/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/board_visuals/ch_quality_trend.png differ diff --git a/output/board_visuals/ch_serious_hotspots.png b/output/board_visuals/ch_serious_hotspots.png new file mode 100644 index 0000000..5ad0b4d Binary files /dev/null and b/output/board_visuals/ch_serious_hotspots.png differ diff --git a/output/board_visuals/ch_top_leaders.png b/output/board_visuals/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/board_visuals/ch_top_leaders.png differ diff --git a/output/board_visuals2/SHEQ_Safety_Performance_20260409_1549.docx b/output/board_visuals2/SHEQ_Safety_Performance_20260409_1549.docx new file mode 100644 index 0000000..151b19c Binary files /dev/null and b/output/board_visuals2/SHEQ_Safety_Performance_20260409_1549.docx differ diff --git a/output/board_visuals2/ch_activity_mix.png b/output/board_visuals2/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/board_visuals2/ch_activity_mix.png differ diff --git a/output/board_visuals2/ch_at_risk_themes.png b/output/board_visuals2/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/board_visuals2/ch_at_risk_themes.png differ diff --git a/output/board_visuals2/ch_bu_comparison.png b/output/board_visuals2/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/board_visuals2/ch_bu_comparison.png differ diff --git a/output/board_visuals2/ch_consequence.png b/output/board_visuals2/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/board_visuals2/ch_consequence.png differ diff --git a/output/board_visuals2/ch_crp_focus.png b/output/board_visuals2/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/board_visuals2/ch_crp_focus.png differ diff --git a/output/board_visuals2/ch_events_monthly.png b/output/board_visuals2/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/board_visuals2/ch_events_monthly.png differ diff --git a/output/board_visuals2/ch_leading_monthly.png b/output/board_visuals2/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/board_visuals2/ch_leading_monthly.png differ diff --git a/output/board_visuals2/ch_llc_topics.png b/output/board_visuals2/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/board_visuals2/ch_llc_topics.png differ diff --git a/output/board_visuals2/ch_low_value_units.png b/output/board_visuals2/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/board_visuals2/ch_low_value_units.png differ diff --git a/output/board_visuals2/ch_overlay.png b/output/board_visuals2/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/board_visuals2/ch_overlay.png differ diff --git a/output/board_visuals2/ch_project_quadrant.png b/output/board_visuals2/ch_project_quadrant.png new file mode 100644 index 0000000..9a164f3 Binary files /dev/null and b/output/board_visuals2/ch_project_quadrant.png differ diff --git a/output/board_visuals2/ch_quality_mix.png b/output/board_visuals2/ch_quality_mix.png new file mode 100644 index 0000000..d4ce282 Binary files /dev/null and b/output/board_visuals2/ch_quality_mix.png differ diff --git a/output/board_visuals2/ch_quality_trend.png b/output/board_visuals2/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/board_visuals2/ch_quality_trend.png differ diff --git a/output/board_visuals2/ch_serious_hotspots.png b/output/board_visuals2/ch_serious_hotspots.png new file mode 100644 index 0000000..5ad0b4d Binary files /dev/null and b/output/board_visuals2/ch_serious_hotspots.png differ diff --git a/output/board_visuals2/ch_top_leaders.png b/output/board_visuals2/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/board_visuals2/ch_top_leaders.png differ diff --git a/output/export_verify/SHEQ_Safety_Performance_20260409_1604.docx b/output/export_verify/SHEQ_Safety_Performance_20260409_1604.docx new file mode 100644 index 0000000..17726e6 Binary files /dev/null and b/output/export_verify/SHEQ_Safety_Performance_20260409_1604.docx differ diff --git a/output/export_verify/SHEQ_Safety_Performance_20260409_1604.pptx b/output/export_verify/SHEQ_Safety_Performance_20260409_1604.pptx new file mode 100644 index 0000000..cc52bc3 Binary files /dev/null and b/output/export_verify/SHEQ_Safety_Performance_20260409_1604.pptx differ diff --git a/output/export_verify/ch_activity_mix.png b/output/export_verify/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/export_verify/ch_activity_mix.png differ diff --git a/output/export_verify/ch_at_risk_themes.png b/output/export_verify/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/export_verify/ch_at_risk_themes.png differ diff --git a/output/export_verify/ch_bu_comparison.png b/output/export_verify/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/export_verify/ch_bu_comparison.png differ diff --git a/output/export_verify/ch_consequence.png b/output/export_verify/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/export_verify/ch_consequence.png differ diff --git a/output/export_verify/ch_crp_focus.png b/output/export_verify/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/export_verify/ch_crp_focus.png differ diff --git a/output/export_verify/ch_events_monthly.png b/output/export_verify/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/export_verify/ch_events_monthly.png differ diff --git a/output/export_verify/ch_leading_monthly.png b/output/export_verify/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/export_verify/ch_leading_monthly.png differ diff --git a/output/export_verify/ch_llc_topics.png b/output/export_verify/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/export_verify/ch_llc_topics.png differ diff --git a/output/export_verify/ch_low_value_units.png b/output/export_verify/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/export_verify/ch_low_value_units.png differ diff --git a/output/export_verify/ch_overlay.png b/output/export_verify/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/export_verify/ch_overlay.png differ diff --git a/output/export_verify/ch_project_quadrant.png b/output/export_verify/ch_project_quadrant.png new file mode 100644 index 0000000..9a164f3 Binary files /dev/null and b/output/export_verify/ch_project_quadrant.png differ diff --git a/output/export_verify/ch_quality_mix.png b/output/export_verify/ch_quality_mix.png new file mode 100644 index 0000000..d4ce282 Binary files /dev/null and b/output/export_verify/ch_quality_mix.png differ diff --git a/output/export_verify/ch_quality_trend.png b/output/export_verify/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/export_verify/ch_quality_trend.png differ diff --git a/output/export_verify/ch_serious_hotspots.png b/output/export_verify/ch_serious_hotspots.png new file mode 100644 index 0000000..5ad0b4d Binary files /dev/null and b/output/export_verify/ch_serious_hotspots.png differ diff --git a/output/export_verify/ch_top_leaders.png b/output/export_verify/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/export_verify/ch_top_leaders.png differ diff --git a/output/modern_report/ch_activity_mix.png b/output/modern_report/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/modern_report/ch_activity_mix.png differ diff --git a/output/modern_report/ch_at_risk_themes.png b/output/modern_report/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/modern_report/ch_at_risk_themes.png differ diff --git a/output/modern_report/ch_bu_comparison.png b/output/modern_report/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/modern_report/ch_bu_comparison.png differ diff --git a/output/modern_report/ch_consequence.png b/output/modern_report/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/modern_report/ch_consequence.png differ diff --git a/output/modern_report/ch_crp_focus.png b/output/modern_report/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/modern_report/ch_crp_focus.png differ diff --git a/output/modern_report/ch_events_monthly.png b/output/modern_report/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/modern_report/ch_events_monthly.png differ diff --git a/output/modern_report/ch_leading_monthly.png b/output/modern_report/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/modern_report/ch_leading_monthly.png differ diff --git a/output/modern_report/ch_llc_topics.png b/output/modern_report/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/modern_report/ch_llc_topics.png differ diff --git a/output/modern_report/ch_low_value_units.png b/output/modern_report/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/modern_report/ch_low_value_units.png differ diff --git a/output/modern_report/ch_overlay.png b/output/modern_report/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/modern_report/ch_overlay.png differ diff --git a/output/modern_report/ch_quality_trend.png b/output/modern_report/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/modern_report/ch_quality_trend.png differ diff --git a/output/modern_report/ch_top_leaders.png b/output/modern_report/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/modern_report/ch_top_leaders.png differ diff --git a/output/modern_report2/SHEQ_Safety_Performance_20260409_1536.docx b/output/modern_report2/SHEQ_Safety_Performance_20260409_1536.docx new file mode 100644 index 0000000..af51230 Binary files /dev/null and b/output/modern_report2/SHEQ_Safety_Performance_20260409_1536.docx differ diff --git a/output/modern_report2/ch_activity_mix.png b/output/modern_report2/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/modern_report2/ch_activity_mix.png differ diff --git a/output/modern_report2/ch_at_risk_themes.png b/output/modern_report2/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/modern_report2/ch_at_risk_themes.png differ diff --git a/output/modern_report2/ch_bu_comparison.png b/output/modern_report2/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/modern_report2/ch_bu_comparison.png differ diff --git a/output/modern_report2/ch_consequence.png b/output/modern_report2/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/modern_report2/ch_consequence.png differ diff --git a/output/modern_report2/ch_crp_focus.png b/output/modern_report2/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/modern_report2/ch_crp_focus.png differ diff --git a/output/modern_report2/ch_events_monthly.png b/output/modern_report2/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/modern_report2/ch_events_monthly.png differ diff --git a/output/modern_report2/ch_leading_monthly.png b/output/modern_report2/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/modern_report2/ch_leading_monthly.png differ diff --git a/output/modern_report2/ch_llc_topics.png b/output/modern_report2/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/modern_report2/ch_llc_topics.png differ diff --git a/output/modern_report2/ch_low_value_units.png b/output/modern_report2/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/modern_report2/ch_low_value_units.png differ diff --git a/output/modern_report2/ch_overlay.png b/output/modern_report2/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/modern_report2/ch_overlay.png differ diff --git a/output/modern_report2/ch_quality_trend.png b/output/modern_report2/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/modern_report2/ch_quality_trend.png differ diff --git a/output/modern_report2/ch_top_leaders.png b/output/modern_report2/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/modern_report2/ch_top_leaders.png differ diff --git a/output/smoke/ch_activity_mix.png b/output/smoke/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/smoke/ch_activity_mix.png differ diff --git a/output/smoke/ch_at_risk_themes.png b/output/smoke/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/smoke/ch_at_risk_themes.png differ diff --git a/output/smoke/ch_bu_comparison.png b/output/smoke/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/smoke/ch_bu_comparison.png differ diff --git a/output/smoke/ch_consequence.png b/output/smoke/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/smoke/ch_consequence.png differ diff --git a/output/smoke/ch_crp_focus.png b/output/smoke/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/smoke/ch_crp_focus.png differ diff --git a/output/smoke/ch_events_monthly.png b/output/smoke/ch_events_monthly.png new file mode 100644 index 0000000..0d16ed1 Binary files /dev/null and b/output/smoke/ch_events_monthly.png differ diff --git a/output/smoke/ch_leading_monthly.png b/output/smoke/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/smoke/ch_leading_monthly.png differ diff --git a/output/smoke/ch_llc_topics.png b/output/smoke/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/smoke/ch_llc_topics.png differ diff --git a/output/smoke/ch_low_value_units.png b/output/smoke/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/smoke/ch_low_value_units.png differ diff --git a/output/smoke/ch_overlay.png b/output/smoke/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/smoke/ch_overlay.png differ diff --git a/output/smoke/ch_top_leaders.png b/output/smoke/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/smoke/ch_top_leaders.png differ diff --git a/output/smoke2/SHEQ_Safety_Performance_20260409_1455.docx b/output/smoke2/SHEQ_Safety_Performance_20260409_1455.docx new file mode 100644 index 0000000..1963f92 Binary files /dev/null and b/output/smoke2/SHEQ_Safety_Performance_20260409_1455.docx differ diff --git a/output/smoke2/ch_activity_mix.png b/output/smoke2/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/smoke2/ch_activity_mix.png differ diff --git a/output/smoke2/ch_at_risk_themes.png b/output/smoke2/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/smoke2/ch_at_risk_themes.png differ diff --git a/output/smoke2/ch_bu_comparison.png b/output/smoke2/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/smoke2/ch_bu_comparison.png differ diff --git a/output/smoke2/ch_consequence.png b/output/smoke2/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/smoke2/ch_consequence.png differ diff --git a/output/smoke2/ch_crp_focus.png b/output/smoke2/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/smoke2/ch_crp_focus.png differ diff --git a/output/smoke2/ch_events_monthly.png b/output/smoke2/ch_events_monthly.png new file mode 100644 index 0000000..0d16ed1 Binary files /dev/null and b/output/smoke2/ch_events_monthly.png differ diff --git a/output/smoke2/ch_leading_monthly.png b/output/smoke2/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/smoke2/ch_leading_monthly.png differ diff --git a/output/smoke2/ch_llc_topics.png b/output/smoke2/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/smoke2/ch_llc_topics.png differ diff --git a/output/smoke2/ch_low_value_units.png b/output/smoke2/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/smoke2/ch_low_value_units.png differ diff --git a/output/smoke2/ch_overlay.png b/output/smoke2/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/smoke2/ch_overlay.png differ diff --git a/output/smoke2/ch_quality_trend.png b/output/smoke2/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/smoke2/ch_quality_trend.png differ diff --git a/output/smoke2/ch_top_leaders.png b/output/smoke2/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/smoke2/ch_top_leaders.png differ diff --git a/output/smoke3/SHEQ_Safety_Performance_20260409_1508.docx b/output/smoke3/SHEQ_Safety_Performance_20260409_1508.docx new file mode 100644 index 0000000..52c9326 Binary files /dev/null and b/output/smoke3/SHEQ_Safety_Performance_20260409_1508.docx differ diff --git a/output/smoke3/ch_activity_mix.png b/output/smoke3/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/smoke3/ch_activity_mix.png differ diff --git a/output/smoke3/ch_at_risk_themes.png b/output/smoke3/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/smoke3/ch_at_risk_themes.png differ diff --git a/output/smoke3/ch_bu_comparison.png b/output/smoke3/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/smoke3/ch_bu_comparison.png differ diff --git a/output/smoke3/ch_consequence.png b/output/smoke3/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/smoke3/ch_consequence.png differ diff --git a/output/smoke3/ch_crp_focus.png b/output/smoke3/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/smoke3/ch_crp_focus.png differ diff --git a/output/smoke3/ch_events_monthly.png b/output/smoke3/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/smoke3/ch_events_monthly.png differ diff --git a/output/smoke3/ch_leading_monthly.png b/output/smoke3/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/smoke3/ch_leading_monthly.png differ diff --git a/output/smoke3/ch_llc_topics.png b/output/smoke3/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/smoke3/ch_llc_topics.png differ diff --git a/output/smoke3/ch_low_value_units.png b/output/smoke3/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/smoke3/ch_low_value_units.png differ diff --git a/output/smoke3/ch_overlay.png b/output/smoke3/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/smoke3/ch_overlay.png differ diff --git a/output/smoke3/ch_quality_trend.png b/output/smoke3/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/smoke3/ch_quality_trend.png differ diff --git a/output/smoke3/ch_top_leaders.png b/output/smoke3/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/smoke3/ch_top_leaders.png differ diff --git a/output/smoke4/SHEQ_Safety_Performance_20260409_1510.docx b/output/smoke4/SHEQ_Safety_Performance_20260409_1510.docx new file mode 100644 index 0000000..f32a7f9 Binary files /dev/null and b/output/smoke4/SHEQ_Safety_Performance_20260409_1510.docx differ diff --git a/output/smoke4/ch_activity_mix.png b/output/smoke4/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/smoke4/ch_activity_mix.png differ diff --git a/output/smoke4/ch_at_risk_themes.png b/output/smoke4/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/smoke4/ch_at_risk_themes.png differ diff --git a/output/smoke4/ch_bu_comparison.png b/output/smoke4/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/smoke4/ch_bu_comparison.png differ diff --git a/output/smoke4/ch_consequence.png b/output/smoke4/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/smoke4/ch_consequence.png differ diff --git a/output/smoke4/ch_crp_focus.png b/output/smoke4/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/smoke4/ch_crp_focus.png differ diff --git a/output/smoke4/ch_events_monthly.png b/output/smoke4/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/smoke4/ch_events_monthly.png differ diff --git a/output/smoke4/ch_leading_monthly.png b/output/smoke4/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/smoke4/ch_leading_monthly.png differ diff --git a/output/smoke4/ch_llc_topics.png b/output/smoke4/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/smoke4/ch_llc_topics.png differ diff --git a/output/smoke4/ch_low_value_units.png b/output/smoke4/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/smoke4/ch_low_value_units.png differ diff --git a/output/smoke4/ch_overlay.png b/output/smoke4/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/smoke4/ch_overlay.png differ diff --git a/output/smoke4/ch_quality_trend.png b/output/smoke4/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/smoke4/ch_quality_trend.png differ diff --git a/output/smoke4/ch_top_leaders.png b/output/smoke4/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/smoke4/ch_top_leaders.png differ diff --git a/output/smoke_input_depth/SHEQ_Safety_Performance_20260409_1525.docx b/output/smoke_input_depth/SHEQ_Safety_Performance_20260409_1525.docx new file mode 100644 index 0000000..35d42ad Binary files /dev/null and b/output/smoke_input_depth/SHEQ_Safety_Performance_20260409_1525.docx differ diff --git a/output/smoke_input_depth/ch_activity_mix.png b/output/smoke_input_depth/ch_activity_mix.png new file mode 100644 index 0000000..7eddf11 Binary files /dev/null and b/output/smoke_input_depth/ch_activity_mix.png differ diff --git a/output/smoke_input_depth/ch_at_risk_themes.png b/output/smoke_input_depth/ch_at_risk_themes.png new file mode 100644 index 0000000..cf70134 Binary files /dev/null and b/output/smoke_input_depth/ch_at_risk_themes.png differ diff --git a/output/smoke_input_depth/ch_bu_comparison.png b/output/smoke_input_depth/ch_bu_comparison.png new file mode 100644 index 0000000..aff89c4 Binary files /dev/null and b/output/smoke_input_depth/ch_bu_comparison.png differ diff --git a/output/smoke_input_depth/ch_consequence.png b/output/smoke_input_depth/ch_consequence.png new file mode 100644 index 0000000..ef79267 Binary files /dev/null and b/output/smoke_input_depth/ch_consequence.png differ diff --git a/output/smoke_input_depth/ch_crp_focus.png b/output/smoke_input_depth/ch_crp_focus.png new file mode 100644 index 0000000..25bba65 Binary files /dev/null and b/output/smoke_input_depth/ch_crp_focus.png differ diff --git a/output/smoke_input_depth/ch_events_monthly.png b/output/smoke_input_depth/ch_events_monthly.png new file mode 100644 index 0000000..9cf98f5 Binary files /dev/null and b/output/smoke_input_depth/ch_events_monthly.png differ diff --git a/output/smoke_input_depth/ch_leading_monthly.png b/output/smoke_input_depth/ch_leading_monthly.png new file mode 100644 index 0000000..aabe61b Binary files /dev/null and b/output/smoke_input_depth/ch_leading_monthly.png differ diff --git a/output/smoke_input_depth/ch_llc_topics.png b/output/smoke_input_depth/ch_llc_topics.png new file mode 100644 index 0000000..df94bee Binary files /dev/null and b/output/smoke_input_depth/ch_llc_topics.png differ diff --git a/output/smoke_input_depth/ch_low_value_units.png b/output/smoke_input_depth/ch_low_value_units.png new file mode 100644 index 0000000..37563f2 Binary files /dev/null and b/output/smoke_input_depth/ch_low_value_units.png differ diff --git a/output/smoke_input_depth/ch_overlay.png b/output/smoke_input_depth/ch_overlay.png new file mode 100644 index 0000000..952fb5a Binary files /dev/null and b/output/smoke_input_depth/ch_overlay.png differ diff --git a/output/smoke_input_depth/ch_quality_trend.png b/output/smoke_input_depth/ch_quality_trend.png new file mode 100644 index 0000000..4886e26 Binary files /dev/null and b/output/smoke_input_depth/ch_quality_trend.png differ diff --git a/output/smoke_input_depth/ch_top_leaders.png b/output/smoke_input_depth/ch_top_leaders.png new file mode 100644 index 0000000..0312a4e Binary files /dev/null and b/output/smoke_input_depth/ch_top_leaders.png differ diff --git a/ppt_builder.py b/ppt_builder.py new file mode 100644 index 0000000..e67effa --- /dev/null +++ b/ppt_builder.py @@ -0,0 +1,253 @@ +""" +ppt_builder.py — PPTX board-pack generation for the SHEQ Analysis Tool. +""" + +from __future__ import annotations + +import logging +import os +from datetime import datetime + +from pptx import Presentation +from pptx.dml.color import RGBColor +from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE +from pptx.enum.text import PP_ALIGN +from pptx.util import Inches, Pt + +from analysis_engine import AnalysisResults + +log = logging.getLogger(__name__) + +DEEP_BLUE = RGBColor(0x0B, 0x32, 0x54) +SKY_BLUE = RGBColor(0x13, 0xB5, 0xEA) +DARK_GREEN = RGBColor(0x00, 0x6E, 0x47) +AMBER = RGBColor(0xD9, 0x77, 0x06) +RED = RGBColor(0xDC, 0x26, 0x26) +GREY = RGBColor(0x64, 0x74, 0x8B) +OFF_WHITE = RGBColor(0xF0, 0xF5, 0xFA) +WHITE = RGBColor(0xFF, 0xFF, 0xFF) + + +def _slide_title(slide, title: str, subtitle: str | None = None) -> None: + tx = slide.shapes.add_textbox(Inches(0.45), Inches(0.25), Inches(12.1), Inches(0.8)) + tf = tx.text_frame + p = tf.paragraphs[0] + r = p.add_run() + r.text = title + r.font.name = "Source Sans Pro" + r.font.size = Pt(24) + r.font.bold = True + r.font.color.rgb = DEEP_BLUE + if subtitle: + p2 = tf.add_paragraph() + r2 = p2.add_run() + r2.text = subtitle + r2.font.name = "Source Sans Pro" + r2.font.size = Pt(11) + r2.font.color.rgb = GREY + + +def _add_banner(slide, title: str, subtitle: str) -> None: + shape = slide.shapes.add_shape(MSO_AUTO_SHAPE_TYPE.RECTANGLE, Inches(0), Inches(0), Inches(13.33), Inches(1.3)) + shape.fill.solid() + shape.fill.fore_color.rgb = DEEP_BLUE + shape.line.fill.background() + tf = shape.text_frame + tf.clear() + p = tf.paragraphs[0] + p.alignment = PP_ALIGN.LEFT + r = p.add_run() + r.text = title + r.font.name = "Source Sans Pro" + r.font.size = Pt(26) + r.font.bold = True + r.font.color.rgb = WHITE + p2 = tf.add_paragraph() + r2 = p2.add_run() + r2.text = subtitle + r2.font.name = "Source Sans Pro" + r2.font.size = Pt(12) + r2.font.color.rgb = RGBColor(0xD7, 0xF2, 0xFF) + + +def _add_text_card(slide, left, top, width, height, title: str, body: str, fill=OFF_WHITE, accent=SKY_BLUE) -> None: + shape = slide.shapes.add_shape(MSO_AUTO_SHAPE_TYPE.ROUNDED_RECTANGLE, left, top, width, height) + shape.fill.solid() + shape.fill.fore_color.rgb = fill + shape.line.color.rgb = WHITE + tf = shape.text_frame + tf.clear() + p = tf.paragraphs[0] + r = p.add_run() + r.text = title + r.font.name = "Source Sans Pro" + r.font.size = Pt(11) + r.font.bold = True + r.font.color.rgb = accent + p2 = tf.add_paragraph() + r2 = p2.add_run() + r2.text = body + r2.font.name = "Source Sans Pro" + r2.font.size = Pt(10) + r2.font.color.rgb = DEEP_BLUE + + +def _add_metric_card(slide, left, top, width, height, label: str, value: str, fill=OFF_WHITE) -> None: + shape = slide.shapes.add_shape(MSO_AUTO_SHAPE_TYPE.ROUNDED_RECTANGLE, left, top, width, height) + shape.fill.solid() + shape.fill.fore_color.rgb = fill + shape.line.color.rgb = WHITE + tf = shape.text_frame + tf.clear() + p = tf.paragraphs[0] + p.alignment = PP_ALIGN.CENTER + r = p.add_run() + r.text = label + r.font.name = "Source Sans Pro" + r.font.size = Pt(9) + r.font.color.rgb = GREY + p2 = tf.add_paragraph() + p2.alignment = PP_ALIGN.CENTER + r2 = p2.add_run() + r2.text = value + r2.font.name = "Source Sans Pro" + r2.font.size = Pt(18) + r2.font.bold = True + r2.font.color.rgb = DEEP_BLUE + + +def _add_bullets(slide, left, top, width, height, title: str, items: list[str], accent=DEEP_BLUE) -> None: + tx = slide.shapes.add_textbox(left, top, width, height) + tf = tx.text_frame + tf.clear() + p = tf.paragraphs[0] + r = p.add_run() + r.text = title + r.font.name = "Source Sans Pro" + r.font.size = Pt(15) + r.font.bold = True + r.font.color.rgb = accent + for item in items: + para = tf.add_paragraph() + para.level = 0 + para.text = item + para.font.name = "Source Sans Pro" + para.font.size = Pt(11) + para.font.color.rgb = DEEP_BLUE + para.bullet = True + + +def _add_chart(slide, image_path: str | None, left, top, width, height=None) -> None: + if image_path and os.path.exists(image_path): + if height is None: + slide.shapes.add_picture(image_path, left, top, width=width) + else: + slide.shapes.add_picture(image_path, left, top, width=width, height=height) + + +def build_presentation(results: AnalysisResults, output_dir: str) -> str: + os.makedirs(output_dir, exist_ok=True) + prs = Presentation() + prs.slide_width = Inches(13.333) + prs.slide_height = Inches(7.5) + blank = prs.slide_layouts[6] + + # Title + slide = prs.slides.add_slide(blank) + _add_banner(slide, "SHEQ Safety Performance Board Pack", "Safety Energy, event hotspots, and leadership action priorities") + dq = results.data_quality + _add_text_card( + slide, Inches(0.6), Inches(1.6), Inches(12.0), Inches(1.0), + "Scope", + f"Events: {dq.get('events', {}).get('date_from', 'N/A')} to {dq.get('events', {}).get('date_to', 'N/A')} | " + f"Safety Energy: {dq.get('safety_energy', {}).get('date_from', 'N/A')} to {dq.get('safety_energy', {}).get('date_to', 'N/A')}", + ) + ev = results.events_summary + lead = results.leading_summary + trends = results.trends + metric_y = Inches(3.0) + card_w = Inches(2.85) + gap = Inches(0.2) + metrics = [ + ("Events", str(ev.get("total", 0))), + ("Moderate+ Events", str(ev.get("serious_count", 0))), + ("MV Events", str(ev.get("motor_vehicle", {}).get("count", 0))), + ("CCC Avg Quality", f"{trends.get('activity_insights', {}).get('CCC', {}).get('avg_quality', 0):.1f}"), + ] + for i, (label, value) in enumerate(metrics): + _add_metric_card(slide, Inches(0.6) + i * (card_w + gap), metric_y, card_w, Inches(1.3), label, value) + _add_text_card(slide, Inches(0.6), Inches(4.9), Inches(12.0), Inches(1.2), "Generated", datetime.now().strftime("%d %B %Y")) + + # Executive summary + slide = prs.slides.add_slide(blank) + _slide_title(slide, "Executive Summary", "What leaders should know right now") + _add_chart(slide, results.charts.get("quality_mix"), Inches(0.55), Inches(1.1), Inches(5.9)) + _add_chart(slide, results.charts.get("project_quadrant"), Inches(6.75), Inches(1.1), Inches(5.95)) + _add_bullets(slide, Inches(0.6), Inches(4.55), Inches(6.0), Inches(2.2), "Key Messages", trends.get("executive_summary", [])[:4], accent=SKY_BLUE) + _add_bullets(slide, Inches(6.8), Inches(4.55), Inches(5.9), Inches(2.2), "Priority Actions", results.recommendations[:4], accent=RED) + + # Events hotspots + slide = prs.slides.add_slide(blank) + _slide_title(slide, "Event Hotspots", "Where event burden and serious consequences are concentrated") + _add_chart(slide, results.charts.get("serious_hotspots"), Inches(0.55), Inches(1.05), Inches(6.0)) + _add_chart(slide, results.charts.get("events_monthly"), Inches(6.75), Inches(1.05), Inches(5.95)) + event_notes = [] + if ev.get("serious_projects"): + top_p = next(iter(ev["serious_projects"].items())) + event_notes.append(f"Highest serious-event project: {top_p[0]} ({top_p[1]} serious events)") + if ev.get("serious_time_buckets"): + top_t = next(iter(ev["serious_time_buckets"].items())) + event_notes.append(f"Most common serious-event timing: {top_t[0]} ({top_t[1]} events)") + if ev.get("motor_vehicle", {}).get("count", 0): + event_notes.append( + f"Motor vehicle events: {ev['motor_vehicle']['count']} total, " + f"{ev['motor_vehicle'].get('serious_count', 0)} moderate+" + ) + _add_bullets(slide, Inches(0.6), Inches(4.75), Inches(12.0), Inches(1.9), "Leadership Focus", event_notes, accent=SKY_BLUE) + + # Leading activity quality + slide = prs.slides.add_slide(blank) + _slide_title(slide, "Leading Activity Quality", "Whether LLC, CCC, and OCC records look meaningful and actionable") + _add_chart(slide, results.charts.get("quality_trend"), Inches(0.55), Inches(1.0), Inches(6.0)) + _add_chart(slide, results.charts.get("quality_mix"), Inches(6.75), Inches(1.0), Inches(5.95)) + quality_notes = [] + for atype in ["CCC", "OCC", "LLC"]: + insight = trends.get("activity_insights", {}).get(atype, {}) + if insight: + quality_notes.append( + f"{atype}: quality {insight.get('avg_quality', 0):.1f}/100, shallow {insight.get('shallow_pct', 0):.1f}%, " + f"follow-up {insight.get('follow_up_pct', 0):.1f}%" + ) + _add_bullets(slide, Inches(0.6), Inches(4.85), Inches(12.0), Inches(1.7), "Quality Readout", quality_notes, accent=SKY_BLUE) + + # Projects and locations + slide = prs.slides.add_slide(blank) + _slide_title(slide, "Projects and Locations", "Which areas appear strongest and which need direct intervention") + _add_chart(slide, results.charts.get("project_quadrant"), Inches(0.55), Inches(1.05), Inches(6.0)) + _add_chart(slide, results.charts.get("low_value_units"), Inches(6.75), Inches(1.05), Inches(5.95)) + proj_watch = results.se_events_rel.get("project_comparison", {}).get("watch", [])[:3] + loc_watch = results.se_events_rel.get("location_comparison", {}).get("watch", [])[:3] + watch_items = [ + f"Project watch: {r.get('project')} | events {r.get('events')} | serious {r.get('serious_events')}" + for r in proj_watch + ] + [ + f"Location watch: {r.get('location')} | events {r.get('events')} | serious {r.get('serious_events')}" + for r in loc_watch + ] + _add_bullets(slide, Inches(0.6), Inches(4.85), Inches(12.0), Inches(1.7), "Watchlist", watch_items[:6], accent=RED) + + # Recommendations + slide = prs.slides.add_slide(blank) + _slide_title(slide, "Recommended Actions", "Executive actions generated from the analysis") + left_items = results.recommendations[:5] + right_items = results.recommendations[5:10] + _add_bullets(slide, Inches(0.6), Inches(1.15), Inches(5.8), Inches(5.7), "Immediate Priorities", left_items, accent=RED) + _add_bullets(slide, Inches(6.8), Inches(1.15), Inches(5.8), Inches(5.7), "Next Priorities", right_items, accent=SKY_BLUE) + + output_path = os.path.join( + output_dir, + f"SHEQ_Safety_Performance_{datetime.now().strftime('%Y%m%d_%H%M')}.pptx", + ) + prs.save(output_path) + log.info("PPTX saved to %s", output_path) + return output_path diff --git a/report_builder.py b/report_builder.py new file mode 100644 index 0000000..d2fbf36 --- /dev/null +++ b/report_builder.py @@ -0,0 +1,1157 @@ +""" +report_builder.py — DOCX report generation for the SHEQ Analysis Tool. + +Takes an AnalysisResults object from analysis_engine and produces a +fully-formatted DOCX report following the Ventia brand guidelines in +DESIGN.md. + +Public API +---------- +build_report(results: AnalysisResults, output_dir: str) -> str + Returns the path to the generated .docx file. +""" + +from __future__ import annotations + +import logging +import os +from datetime import datetime +from typing import Any + +import pandas as pd +from docx import Document +from docx.enum.table import WD_TABLE_ALIGNMENT +from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.oxml import parse_xml +from docx.oxml.ns import nsdecls +from docx.shared import Cm, Inches, Pt, RGBColor + +from analysis_engine import AnalysisResults +from config import ( + CONSEQUENCE_ORDER, + DEEP_BLUE, SKY_BLUE, DARK_GREEN, MUTED, + AMBER, RED, +) + +log = logging.getLogger(__name__) + +# ── Brand RGBColor objects ──────────────────────────────────────────────────── +NAVY = RGBColor(0x0B, 0x32, 0x54) # Deep Blue +TEAL = RGBColor(0x13, 0xB5, 0xEA) # Sky Blue +GREEN = RGBColor(0x00, 0x6E, 0x47) # Dark Green +GREY = RGBColor(0x64, 0x74, 0x8B) # Muted / slate grey +ALERT = RGBColor(0xDC, 0x26, 0x26) # Red + + +# ───────────────────────────────────────────────────────────────────────────── +# Low-level DOCX helpers +# ───────────────────────────────────────────────────────────────────────────── + +def _shading(cell, hex_color: str) -> None: + """Apply background fill to a table cell.""" + shd = parse_xml( + f'' + ) + cell._tc.get_or_add_tcPr().append(shd) + + +def _run(para, text: str, bold: bool = False, size_pt: int = 11, + colour: RGBColor = NAVY, italic: bool = False) -> None: + run = para.add_run(text) + run.bold = bold + run.italic = italic + run.font.size = Pt(size_pt) + if isinstance(colour, str): + colour = RGBColor.from_string(colour.replace("#", "").upper()) + run.font.color.rgb = colour + run.font.name = "Source Sans Pro" + + +def _heading(doc: Document, text: str, level: int) -> None: + doc.add_heading(text, level=level) + + +def _para(doc: Document, text: str = "", bold: bool = False, + size_pt: int = 11, colour: RGBColor = NAVY) -> None: + p = doc.add_paragraph() + _run(p, text, bold=bold, size_pt=size_pt, colour=colour) + + +def _bullet(doc: Document, text: str, size_pt: int = 11) -> None: + p = doc.add_paragraph(style="List Bullet") + _run(p, text, size_pt=size_pt, colour=NAVY) + + +def _callout(doc: Document, title: str, text: str, + fill: str = "f0f5fa", accent: RGBColor = TEAL) -> None: + table = doc.add_table(rows=1, cols=1) + table.alignment = WD_TABLE_ALIGNMENT.LEFT + table.style = "Table Grid" + cell = table.cell(0, 0) + _shading(cell, fill) + cell.text = "" + p1 = cell.paragraphs[0] + _run(p1, title, bold=True, size_pt=11, colour=accent) + p2 = cell.add_paragraph() + _run(p2, text, size_pt=10, colour=NAVY) + + +def _metric_cards(doc: Document, cards: list[tuple[str, str]], + cols: int = 4, fill: str = "f0f5fa") -> None: + if not cards: + return + rows = (len(cards) + cols - 1) // cols + table = doc.add_table(rows=rows, cols=cols) + table.alignment = WD_TABLE_ALIGNMENT.LEFT + table.style = "Table Grid" + idx = 0 + for r in range(rows): + for c in range(cols): + cell = table.cell(r, c) + cell.text = "" + _shading(cell, fill if idx % 2 == 0 else "ffffff") + if idx < len(cards): + title, value = cards[idx] + p1 = cell.paragraphs[0] + _run(p1, title, bold=False, size_pt=8, colour=GREY) + p1.alignment = WD_ALIGN_PARAGRAPH.CENTER + p2 = cell.add_paragraph() + _run(p2, value, bold=True, size_pt=16, colour=NAVY) + p2.alignment = WD_ALIGN_PARAGRAPH.CENTER + idx += 1 + + +def _add_table( + doc: Document, + headers: list[str], + rows: list[list[str]], + col_widths: list[float], # inches + header_fill: str = "0b3254", + alt_fill: str = "f0f5fa", +) -> None: + """Add a brand-styled table with a Deep Blue header row.""" + table = doc.add_table(rows=1 + len(rows), cols=len(headers)) + table.alignment = WD_TABLE_ALIGNMENT.LEFT + table.style = "Table Grid" + + # Header row + for i, h in enumerate(headers): + cell = table.rows[0].cells[i] + cell.text = "" + p = cell.paragraphs[0] + run = p.add_run(h) + run.bold = True + run.font.size = Pt(9) + run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) + run.font.name = "Source Sans Pro" + _shading(cell, header_fill) + + # Data rows + for ri, row in enumerate(rows): + for ci, val in enumerate(row): + cell = table.rows[ri + 1].cells[ci] + cell.text = "" + p = cell.paragraphs[0] + run = p.add_run(str(val) if val is not None else "—") + run.font.size = Pt(9) + run.font.name = "Source Sans Pro" + bg = alt_fill if ri % 2 == 0 else "ffffff" + _shading(cell, bg) + + # Column widths + for ri, row in enumerate(table.rows): + for ci, w in enumerate(col_widths): + if ci < len(row.cells): + row.cells[ci].width = Inches(w) + + +def _add_chart(doc: Document, charts: dict[str, str], key: str, + width_in: float = 5.5) -> None: + """Insert a chart image if it exists.""" + path = charts.get(key) + if path and os.path.exists(path): + doc.add_picture(path, width=Inches(width_in)) + else: + _para(doc, f"[Chart '{key}' not available]", colour=GREY, size_pt=9) + + +def _spacer(doc: Document) -> None: + doc.add_paragraph("") + + +def _page_break(doc: Document) -> None: + doc.add_page_break() + + +# ───────────────────────────────────────────────────────────────────────────── +# Document bootstrap +# ───────────────────────────────────────────────────────────────────────────── + +def _bootstrap_styles(doc: Document) -> None: + """Apply brand typography to the Document's built-in styles.""" + style = doc.styles["Normal"] + style.font.name = "Source Sans Pro" + style.font.size = Pt(11) + + for level, size, colour in [ + (1, 16, NAVY), + (2, 13, TEAL), + (3, 11, NAVY), + ]: + hs = doc.styles[f"Heading {level}"] + hs.font.name = "Source Sans Pro" + hs.font.size = Pt(size) + hs.font.color.rgb = colour + hs.font.bold = True + + sec = doc.sections[0] + sec.top_margin = Cm(1.5) + sec.bottom_margin = Cm(1.4) + sec.left_margin = Cm(1.6) + sec.right_margin = Cm(1.6) + + +# ───────────────────────────────────────────────────────────────────────────── +# Title page +# ───────────────────────────────────────────────────────────────────────────── + +def _title_page(doc: Document, results: AnalysisResults) -> None: + dq = results.data_quality + + ev_from = dq.get("events", {}).get("date_from", "N/A") + ev_to = dq.get("events", {}).get("date_to", "N/A") + se_from = dq.get("safety_energy", {}).get("date_from", "N/A") + se_to = dq.get("safety_energy", {}).get("date_to", "N/A") + + banner = doc.add_table(rows=1, cols=1) + banner.style = "Table Grid" + cell = banner.cell(0, 0) + _shading(cell, "0b3254") + cell.text = "" + p = cell.paragraphs[0] + p.alignment = WD_ALIGN_PARAGRAPH.LEFT + _run(p, "SHEQ Safety Performance Report", bold=True, size_pt=26, colour=RGBColor(0xFF, 0xFF, 0xFF)) + p2 = cell.add_paragraph() + _run(p2, "Safety Energy, event risk, and leadership focus areas", size_pt=13, colour=RGBColor(0xD7, 0xF2, 0xFF)) + + _spacer(doc) + _callout( + doc, + "Report Scope", + f"Events window: {ev_from} – {ev_to}. Leading activity window: {se_from} – {se_to}. " + "Built for senior leaders as a concise decision-support pack rather than a compliance summary.", + fill="eef6fb", + accent=TEAL, + ) + _spacer(doc) + _metric_cards(doc, [ + ("Generated", datetime.now().strftime("%d %b %Y")), + ("Events Coverage", f"{ev_from} to {ev_to}"), + ("Safety Energy Coverage", f"{se_from} to {se_to}"), + ("Audience", "Executive / Board"), + ], cols=2) + + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 1. Executive Summary +# ───────────────────────────────────────────────────────────────────────────── + +def _section_executive_summary(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "1. Executive Summary", 1) + ev = results.events_summary + lead = results.leading_summary + trends = results.trends + recs = results.recommendations + + totals = lead.get("totals", {}) + + _callout( + doc, + "Leadership Readout", + "This summary highlights where event risk is concentrated, where leading activities appear strongest or weakest, " + "and what senior leaders should prioritise next.", + fill="eef6fb", + accent=TEAL, + ) + _spacer(doc) + _metric_cards(doc, [ + ("Total Events", str(ev.get("total", 0))), + ("Moderate+ Events", f"{ev.get('serious_count', 0)}"), + ("Events / Month", f"{ev.get('events_per_month', 0):.1f}"), + ("Motor Vehicle Events", str(ev.get("motor_vehicle", {}).get("count", 0))), + ("LLCs", str(totals.get("LLC", 0))), + ("CCCs", str(totals.get("CCC", 0))), + ("OCCs", str(totals.get("OCC", 0))), + ("CCC Avg Quality", f"{trends.get('activity_insights', {}).get('CCC', {}).get('avg_quality', 0):.1f}"), + ], cols=4) + _spacer(doc) + + _heading(doc, "1.1 What Leaders Should Know", 2) + for rec in trends.get("executive_summary", [])[:4]: + _bullet(doc, rec) + _spacer(doc) + + _heading(doc, "1.2 Board Visual Snapshot", 2) + _add_chart(doc, results.charts, "quality_mix", 6.0) + _spacer(doc) + _add_chart(doc, results.charts, "project_quadrant", 6.0) + _spacer(doc) + + _heading(doc, "1.3 Priority Actions", 2) + for rec in recs[:5]: + _callout(doc, "Recommended Action", rec, fill="f7fbff", accent=NAVY) + _spacer(doc) + + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 2. Data Quality +# ───────────────────────────────────────────────────────────────────────────── + +def _section_data_quality(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "2. Data Quality and Coverage", 1) + dq = results.data_quality + + _para(doc, + "This section summarises the completeness and date coverage of each data source. " + "Any gaps identified here may affect the reliability of subsequent analysis sections.", + size_pt=11, colour=NAVY) + _spacer(doc) + + sources = [ + ("Events", dq.get("events", {}), + [("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"), + ("Null — Event Type", "null_event_type"), ("Null — Consequence", "null_consequence"), + ("Null — Business Unit", "null_business_unit"), ("Null — Root Cause", "null_root_cause")]), + ("Safety Energy", dq.get("safety_energy", {}), + [("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"), + ("Null — Leader", "null_leader"), ("Null — Business Unit", "null_bu")]), + ("LLC Data", dq.get("llc", {}), + [("Rows", "rows"), ("Date From", "date_from"), ("Date To", "date_to"), + ("Null — Topic", "null_topic"), ("Null — Leader", "null_leader")]), + ] + + for label, data, fields in sources: + _heading(doc, label, 2) + rows = [[f, str(data.get(k, "N/A"))] for f, k in fields] + _add_table(doc, ["Field", "Value"], rows, [3.0, 3.5]) + _spacer(doc) + + # Activity type breakdown for Safety Energy + if label == "Safety Energy": + breakdown = data.get("type_breakdown", {}) + if breakdown: + br_rows = [[k, str(v)] for k, v in sorted(breakdown.items())] + _add_table(doc, ["Activity Type", "Count"], br_rows, [3.0, 3.5]) + _spacer(doc) + + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 3. Events Analysis +# ───────────────────────────────────────────────────────────────────────────── + +def _section_events(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "3. Events Analysis", 1) + ev = results.events_summary + chts = results.charts + + _callout( + doc, + "Event Story", + "This section focuses on where event burden is building, where serious outcomes are concentrated, and what special risk signals are visible in timing and motor vehicle data.", + fill="f7fbff", + accent=TEAL, + ) + _spacer(doc) + + # Monthly trend chart + _heading(doc, "3.1 Monthly Trend", 2) + _add_chart(doc, chts, "events_monthly", 6.0) + _spacer(doc) + + # Consequence chart + _heading(doc, "3.2 Consequence Severity", 2) + _add_chart(doc, chts, "consequence", 5.5) + _spacer(doc) + + # Event type table + _heading(doc, "3.3 Event Type Breakdown", 2) + event_types = ev.get("event_type_counts", {}) + total_events = max(ev.get("total", 1), 1) + et_rows = [[t, str(c), f"{c/total_events*100:.1f}%"] for t, c in event_types.items()] + _add_table(doc, ["Event Type", "Count", "%"], et_rows, [3.0, 1.2, 1.0]) + _spacer(doc) + + # CRP table + crp = ev.get("crp_counts", {}) + if crp: + _heading(doc, "3.4 Critical Risk Protocols Involved", 2) + crp_rows = [[c, str(v)] for c, v in crp.items()] + _add_table(doc, ["CRP", "Count"], crp_rows, [4.0, 1.5]) + _spacer(doc) + + # Root cause table + rc = ev.get("root_cause_counts", {}) + if rc: + _heading(doc, "3.5 Root Cause Categories", 2) + rc_rows = [[r, str(v)] for r, v in rc.items()] + _add_table(doc, ["Root Cause", "Count"], rc_rows, [4.0, 1.5]) + _spacer(doc) + + serious_projects = ev.get("serious_projects", {}) + serious_locations = ev.get("serious_locations", {}) + if serious_projects or serious_locations: + _heading(doc, "3.6 Serious Event Hotspots", 2) + _para(doc, + "These are the projects and locations with the highest counts of moderate, major, or substantial events in the analysis window.", + size_pt=11, colour=NAVY) + _add_chart(doc, chts, "serious_hotspots", 5.8) + _spacer(doc) + if serious_projects: + rows = [[k, str(v)] for k, v in serious_projects.items()] + _add_table(doc, ["Project", "Serious Events"], rows, [4.0, 1.5]) + _spacer(doc) + if serious_locations: + rows = [[k, str(v)] for k, v in serious_locations.items()] + _add_table(doc, ["Location", "Serious Events"], rows, [4.0, 1.5]) + _spacer(doc) + + time_buckets = ev.get("serious_time_buckets", {}) + if time_buckets: + _heading(doc, "3.7 Serious Event Timing", 2) + _para(doc, + f"Time-of-day information was available for {ev.get('serious_time_coverage_pct', 0):.1f}% of serious events.", + size_pt=11, colour=NAVY) + rows = [[bucket, str(count)] for bucket, count in time_buckets.items()] + _add_table(doc, ["Time of Day", "Serious Events"], rows, [3.5, 1.5]) + _spacer(doc) + + motor = ev.get("motor_vehicle", {}) + if motor.get("count", 0) > 0: + _heading(doc, "3.8 Motor Vehicle Events", 2) + mv_rows = [ + ["Total Motor Vehicle Events", str(motor.get("count", 0))], + ["Share of All Events", f"{motor.get('pct_total', 0):.1f}%"], + ["Moderate+ MV Events", str(motor.get("serious_count", 0))], + ["Serious Rate Within MV Events", f"{motor.get('serious_pct_within_mve', 0):.1f}%"], + ] + _add_table(doc, ["Metric", "Value"], mv_rows, [3.5, 2.0]) + _spacer(doc) + if motor.get("top_projects"): + rows = [[k, str(v)] for k, v in motor.get("top_projects", {}).items()] + _add_table(doc, ["Project", "MV Events"], rows, [4.0, 1.5]) + _spacer(doc) + rows = [] + for label, values in [ + ("Road Type", motor.get("road_types", {})), + ("Road Condition", motor.get("conditions", {})), + ("Vehicle Type", motor.get("vehicle_types", {})), + ]: + for key, val in list(values.items())[:4]: + rows.append([label, key, str(val)]) + if rows: + _add_table(doc, ["Category", "Pattern", "Count"], rows, [1.5, 3.0, 1.0]) + _spacer(doc) + + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 4. Leading Activity Overview +# ───────────────────────────────────────────────────────────────────────────── + +def _section_leading_overview(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "4. Safety Energy — Leading Activity Overview", 1) + lead = results.leading_summary + trends = results.trends + chts = results.charts + + _callout( + doc, + "Leading Activity Readout", + "Safety Energy combines LLC, CCC, and OCC activity. The deeper two-year review is designed to show not just volume, " + "but whether the records look rich, preventive, and useful for leadership learning.", + fill="eef6fb", + accent=TEAL, + ) + _spacer(doc) + + # Activity mix donut + _heading(doc, "4.1 Activity Type Mix", 2) + _add_chart(doc, chts, "activity_mix", 4.5) + _spacer(doc) + + # Totals table + totals = lead.get("totals", {}) + avg_at_risk = lead.get("avg_at_risk", {}) + if totals: + rows = [ + [atype, + str(totals.get(atype, 0)), + f"{avg_at_risk.get(atype, 0):.2f}"] + for atype in ["LLC", "CCC", "OCC"] + if totals.get(atype, 0) > 0 + ] + _add_table( + doc, + ["Activity Type", "Total Count", "Avg At-Risk Aspects per Activity"], + rows, + [2.5, 1.5, 3.0], + ) + _spacer(doc) + + # Monthly trend chart + _heading(doc, "4.2 Monthly Activity Trend", 2) + _para(doc, f"Overall trend: {lead.get('activity_trend', 'N/A')}", size_pt=11, colour=GREY) + _add_chart(doc, chts, "leading_monthly", 6.0) + _spacer(doc) + + # BU breakdown + _heading(doc, "4.3 Activity by Business Unit", 2) + bu_by_type = lead.get("bu_by_type", {}) + all_bus: list[str] = sorted(set( + bu for d in bu_by_type.values() for bu in d.keys() + )) + if all_bus: + rows = [ + [bu] + [str(bu_by_type.get(at, {}).get(bu, 0)) for at in ["LLC", "CCC", "OCC"]] + for bu in all_bus + ] + _add_table(doc, + ["Business Unit", "LLC", "CCC", "OCC"], + rows, + [2.8, 1.0, 1.0, 1.0]) + _spacer(doc) + + # LLC topic breakdown (from LLC_Data) + _heading(doc, "4.4 LLC Conversation Topics", 2) + _para(doc, + "The following topics were most frequently recorded in Leader Learning Conversations. " + "Topic coverage indicates where leaders are directing their field conversations.", + size_pt=11, colour=NAVY) + _add_chart(doc, chts, "llc_topics", 5.5) + _spacer(doc) + + top_topics = lead.get("top_topics", {}) + if top_topics: + rows = [[k, str(v)] for k, v in list(top_topics.items())[:12]] + _add_table(doc, ["Topic", "Count"], rows, [4.0, 1.5]) + _spacer(doc) + + # CRP focus + _heading(doc, "4.5 CRP Focus Areas in LLCs", 2) + _add_chart(doc, chts, "crp_focus", 5.5) + _spacer(doc) + + # Top leaders + _heading(doc, "4.6 Most Active Leaders (LLC)", 2) + _add_chart(doc, chts, "top_leaders", 5.5) + _spacer(doc) + + top_leaders = lead.get("top_leaders", {}) + if top_leaders: + rows = [[l, str(c)] for l, c in list(top_leaders.items())[:12]] + _add_table(doc, ["Leader", "LLC Count"], rows, [4.0, 1.5]) + _spacer(doc) + + # Two-year trend and quality view + _heading(doc, "4.7 Rolling Two-Year Trend and Quality View", 2) + _para(doc, + f"This view uses a rolling two-year Safety Energy window from " + f"{trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}. " + "It is designed to answer not just whether activities were completed, but whether " + "the records suggest meaningful learning, risk recognition, and follow-up quality.", + size_pt=11, colour=NAVY) + _para(doc, trends.get("proxy_note", ""), size_pt=10, colour=GREY) + _add_chart(doc, chts, "quality_trend", 6.0) + _spacer(doc) + _add_chart(doc, chts, "quality_mix", 6.0) + _spacer(doc) + + quality_rows = trends.get("quality_by_type", []) + if quality_rows: + rows = [ + [ + r.get("activity_type", ""), + str(r.get("count", 0)), + f"{r.get('avg_quality', 0):.1f}", + f"{r.get('avg_input_depth', 0):.1f}", + f"{r.get('meaningful_pct', 0):.1f}%", + f"{r.get('high_value_pct', 0):.1f}%", + f"{r.get('shallow_pct', 0):.1f}%", + f"{r.get('follow_up_pct', 0):.1f}%", + ] + for r in quality_rows + ] + _add_table( + doc, + ["Type", "Count", "Avg Quality", "Avg Input Depth", "Meaningful", "High Value", "Shallow", "Follow-up"], + rows, + [0.8, 0.8, 0.9, 1.0, 0.9, 0.9, 0.9, 0.9], + ) + _spacer(doc) + + input_depth = trends.get("input_depth", {}) + if input_depth.get("correlation") is not None: + _heading(doc, "4.8 Input Depth as a Supporting Quality Metric", 2) + _para(doc, + f"Across the two-year Safety Energy window, input depth and quality score are correlated at r = {input_depth.get('correlation'):.2f}. " + f"{input_depth.get('note', '')}", + size_pt=11, colour=NAVY) + by_band = input_depth.get("by_band", []) + if by_band: + rows = [ + [ + r.get("band", ""), + str(r.get("count", 0)), + f"{r.get('avg_input_depth', 0):.1f}", + f"{r.get('avg_quality', 0):.1f}", + f"{r.get('meaningful_pct', 0):.1f}%", + f"{r.get('high_value_pct', 0):.1f}%", + f"{r.get('shallow_pct', 0):.1f}%", + ] + for r in by_band + ] + _add_table( + doc, + ["Band", "Count", "Avg Input Depth", "Avg Quality", "Meaningful", "High Value", "Shallow"], + rows, + [0.9, 0.8, 1.0, 0.9, 0.9, 0.9, 0.9], + ) + _spacer(doc) + + top_themes = trends.get("top_themes", {}) + if top_themes: + _para(doc, "Most common recurring themes in Safety Energy narratives:", bold=True, size_pt=11, colour=NAVY) + for theme, count in list(top_themes.items())[:5]: + _bullet(doc, f"{theme}: {count} mentions") + _spacer(doc) + + _heading(doc, "4.9 CCC / OCCC / LLC Value Signals", 2) + activity_insights = trends.get("activity_insights", {}) + for atype in ["CCC", "OCC", "LLC"]: + insight = activity_insights.get(atype, {}) + if not insight: + continue + _para( + doc, + f"{atype}: average quality {insight.get('avg_quality', 0):.1f}/100, " + f"average input depth {insight.get('avg_input_depth', 0):.1f}/100, " + f"{insight.get('preventive_pct', 0):.1f}% preventive signal, " + f"{insight.get('reactive_pct', 0):.1f}% reactive signal, " + f"{insight.get('repetitive_pct', 0):.1f}% repetitive signal, " + f"{insight.get('shallow_pct', 0):.1f}% shallow.", + size_pt=11, + colour=NAVY, + ) + depth = insight.get("input_depth", {}) + if depth.get("correlation") is not None: + _para(doc, + f"For {atype}, input depth vs quality correlation is r = {depth.get('correlation'):.2f}.", + size_pt=10, colour=GREY) + top_modules = insight.get("top_modules", {}) + if top_modules: + _para(doc, f"Top {atype} focus areas:", bold=True, size_pt=10, colour=GREY) + for label, count in list(top_modules.items())[:4]: + _bullet(doc, f"{label}: {count}") + _spacer(doc) + + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 5. Effectiveness of Leading Activities +# ───────────────────────────────────────────────────────────────────────────── + +def _section_effectiveness(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "5. Effectiveness of Leading Activities", 1) + eff = results.effectiveness + chts = results.charts + + _para(doc, + "This section examines whether leading activity patterns appear associated with " + "event outcomes at a business unit and portfolio level. All findings are associative " + "only — correlation does not imply causation.", + size_pt=11, colour=NAVY) + _spacer(doc) + + # Monthly overlay chart + _heading(doc, "5.1 Monthly Activities vs Events Overlay", 2) + _add_chart(doc, chts, "overlay", 6.0) + _spacer(doc) + + # Correlation note + _heading(doc, "5.2 Statistical Association", 2) + _para(doc, eff.get("corr_note", "N/A"), size_pt=11, colour=NAVY) + _spacer(doc) + + # BU comparison chart + _heading(doc, "5.3 Activities vs Events by Business Unit", 2) + _add_chart(doc, chts, "bu_comparison", 5.5) + _spacer(doc) + + # BU table + bu_table = eff.get("bu_table", []) + if bu_table: + rows = [ + [r.get("business_unit", ""), str(r.get("activities", 0)), + str(r.get("events", 0))] + for r in bu_table + ] + _add_table(doc, + ["Business Unit", "Leading Activities", "Events"], + rows, + [3.0, 2.0, 1.5]) + _spacer(doc) + + # Pattern commentary + high_both = eff.get("high_activity_high_events", []) + high_acts = eff.get("high_activity_low_events", []) + + if high_both: + _heading(doc, "5.4 High Activity / High Events — Pattern of Interest", 2) + _para(doc, + "The following Business Units recorded both above-median leading-activity volumes " + "and above-median event counts. This may indicate reactive activity patterns where " + "engagement is increasing in response to events rather than preventing them. " + "Further investigation is recommended.", + size_pt=11, colour=NAVY) + for bu in high_both: + _bullet(doc, bu) + _spacer(doc) + + if high_acts: + _heading(doc, "5.5 High Activity / Low Events — Positive Signal", 2) + _para(doc, + "The following Business Units recorded above-median leading-activity volumes " + "and below-median event counts. This pattern is consistent with leading activities " + "having a preventive effect, though this cannot be confirmed from available data.", + size_pt=11, colour=NAVY) + for bu in high_acts: + _bullet(doc, bu) + _spacer(doc) + + _para(doc, eff.get("note", ""), size_pt=10, colour=GREY) + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 6. At-Risk Behaviours +# ───────────────────────────────────────────────────────────────────────────── + +def _section_at_risk(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "6. At-Risk Behaviours", 1) + ar = results.at_risk + chts = results.charts + + _para(doc, + "At-risk behaviour themes are identified by analysing free-text fields across all " + "three data sources (Events descriptions, LLC conversation notes, and Safety Energy " + "observations) using keyword matching against known risk categories.", + size_pt=11, colour=NAVY) + _spacer(doc) + + _heading(doc, "6.1 Combined Theme Frequency", 2) + _add_chart(doc, chts, "at_risk_themes", 5.5) + _spacer(doc) + + combined = ar.get("combined_themes", {}) + if combined: + rows = [[k, str(v)] for k, v in combined.items()] + _add_table( + doc, + ["Risk Theme", "Weighted Frequency"], + rows, + [3.5, 2.0], + ) + _spacer(doc) + + # LLC theme focus vs event themes + _heading(doc, "6.2 LLC Conversation Topics vs Event Themes", 2) + llc_themes = ar.get("llc_themes", {}) + ev_themes = ar.get("event_themes", {}) + + if llc_themes or ev_themes: + all_themes = sorted( + set(list(llc_themes.keys()) + list(ev_themes.keys())) + ) + rows = [ + [t, str(llc_themes.get(t, 0)), str(ev_themes.get(t, 0))] + for t in all_themes + ] + _add_table(doc, + ["Risk Theme", "LLC Mentions", "Event Mentions"], + rows, + [3.0, 1.5, 1.5]) + _spacer(doc) + + # Alignment gap + gap = ar.get("gap_themes", []) + if gap: + _heading(doc, "6.3 Topic Alignment Gaps", 2) + _para(doc, + "The following risk themes appear among the top event themes but are under-represented " + "in LLC conversation topics. This may indicate a gap between where safety conversations " + "are focused and where actual events are occurring.", + size_pt=11, colour=NAVY) + for theme in gap: + _bullet(doc, theme) + _spacer(doc) + + _para(doc, ar.get("note", ""), size_pt=10, colour=GREY) + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 7. Safety Energy ↔ Events Relationship +# ───────────────────────────────────────────────────────────────────────────── + +def _section_se_events(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "7. Relationship Between Safety Energy and Events", 1) + rel = results.se_events_rel + chts = results.charts + + _callout( + doc, + "Coverage vs Outcome", + "This section links leading activity coverage to event burden so leaders can see which business units, projects, and locations appear strongest, and which need direct intervention.", + fill="f7fbff", + accent=TEAL, + ) + _spacer(doc) + + # BU comparison table + _heading(doc, "7.1 Business Unit Comparison", 2) + bu_comp = rel.get("bu_comparison", []) + if bu_comp: + rows = [ + [str(r.get("business_unit", "")), + str(int(r.get("activities", 0))), + str(int(r.get("events", 0))), + str(r.get("ratio", "—") if r.get("ratio", "—") is not None else "—")] + for r in bu_comp + ] + _add_table(doc, + ["Business Unit", "Activities", "Events", "Activity:Event Ratio"], + rows, + [2.5, 1.2, 1.0, 2.0]) + _spacer(doc) + + project_comp = rel.get("project_comparison", {}) + if project_comp.get("best") or project_comp.get("watch"): + _heading(doc, "7.2 Project Performance Signals", 2) + _para(doc, + "These project comparisons use recorded Safety Energy activity against recorded events. They are intended as directional signals only, not league tables.", + size_pt=11, colour=NAVY) + _add_chart(doc, chts, "project_quadrant", 5.8) + _spacer(doc) + if project_comp.get("best"): + rows = [ + [str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)), + str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")] + for r in project_comp.get("best", []) + ] + _add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3]) + _spacer(doc) + if project_comp.get("watch"): + _para(doc, "Projects needing leadership attention:", bold=True, size_pt=11, colour=NAVY) + rows = [ + [str(r.get("project", "")), str(r.get("activities", 0)), str(r.get("events", 0)), + str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")] + for r in project_comp.get("watch", [])[:6] + ] + _add_table(doc, ["Project", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3]) + _spacer(doc) + + location_comp = rel.get("location_comparison", {}) + if location_comp.get("best") or location_comp.get("watch"): + _heading(doc, "7.3 Location Performance Signals", 2) + if location_comp.get("best"): + rows = [ + [str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)), + str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")] + for r in location_comp.get("best", []) + ] + _add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3]) + _spacer(doc) + if location_comp.get("watch"): + _para(doc, "Locations needing leadership attention:", bold=True, size_pt=11, colour=NAVY) + rows = [ + [str(r.get("location", "")), str(r.get("activities", 0)), str(r.get("events", 0)), + str(r.get("serious_events", 0)), str(r.get("activity_event_ratio", "—") if r.get("activity_event_ratio", "—") is not None else "—")] + for r in location_comp.get("watch", [])[:6] + ] + _add_table(doc, ["Location", "Activities", "Events", "Serious", "Activity:Event"], rows, [2.8, 1.0, 0.9, 0.9, 1.3]) + _spacer(doc) + + # Spike months + spikes = rel.get("spike_months", []) + if spikes: + _heading(doc, "7.4 Event Spike Periods", 2) + _para(doc, + "The following periods recorded above-average event counts coinciding with " + "below-average leading-activity volumes. These periods may warrant retrospective " + "review.", + size_pt=11, colour=NAVY) + for m in spikes: + _bullet(doc, m) + _spacer(doc) + + # Topic alignment note + _heading(doc, "7.5 Topic Alignment Observation", 2) + _para(doc, rel.get("alignment_note", ""), size_pt=11, colour=NAVY) + + llc_top = rel.get("llc_top_topics", []) + ev_top = rel.get("ev_top_rc", []) + if llc_top: + _para(doc, "Top LLC topics:", bold=True, size_pt=11, colour=NAVY) + for t in llc_top: + _bullet(doc, str(t)) + if ev_top: + _para(doc, "Top event root causes:", bold=True, size_pt=11, colour=NAVY) + for t in ev_top: + _bullet(doc, str(t)) + _spacer(doc) + + _para(doc, rel.get("note", ""), size_pt=10, colour=GREY) + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 8. Leader Focus Areas +# ───────────────────────────────────────────────────────────────────────────── + +def _section_focus_areas(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "8. Leader Focus Areas", 1) + fa = results.focus_areas + trends = results.trends + chts = results.charts + + _para(doc, + "This section identifies Business Units and leaders that warrant specific leadership " + "attention based on activity volumes, event rates, and observed trends.", + size_pt=11, colour=NAVY) + _spacer(doc) + + # BU summary table + _heading(doc, "8.1 Business Unit Activity and Event Summary", 2) + bu_summary = fa.get("bu_summary", []) + if bu_summary: + rows = [ + [str(r.get("business_unit", "")), + str(r.get("activities", 0)), + str(r.get("events", 0))] + for r in bu_summary + ] + _add_table(doc, + ["Business Unit", "Leading Activities", "Events"], + rows, + [3.0, 2.0, 1.5]) + _spacer(doc) + + # Declining BUs + declining = fa.get("declining_bus", []) + if declining: + _heading(doc, "8.2 Declining Activity Units", 2) + _para(doc, + "The following Business Units recorded significantly lower leading-activity " + "volumes in the second half of the analysis period compared to the first half. " + "Leaders in these units should be engaged to understand and address the decline.", + size_pt=11, colour=NAVY) + for bu in declining: + _bullet(doc, bu) + _spacer(doc) + + leadership_focus = trends.get("leadership_focus", []) + if leadership_focus: + _heading(doc, "8.3 Leadership Watchouts from Two-Year Safety Energy Trends", 2) + for item in leadership_focus[:5]: + _bullet(doc, item) + _spacer(doc) + + low_value_units = trends.get("high_volume_low_value", []) + if low_value_units: + _heading(doc, "8.4 High-Volume / Low-Value Hotspots", 2) + _para(doc, + "These areas are recording substantial activity volume, but the record quality signals " + "suggest the activity may be drifting toward compliance-only completion rather than strong learning.", + size_pt=11, colour=NAVY) + _add_chart(doc, chts, "low_value_units", 5.8) + _spacer(doc) + rows = [ + [ + r.get("activity_type", ""), + str(r.get("business_unit", "")), + str(r.get("count", 0)), + f"{r.get('avg_quality', 0):.1f}", + f"{r.get('shallow_pct', 0):.1f}%", + ] + for r in low_value_units[:8] + ] + _add_table(doc, + ["Type", "Business Unit", "Count", "Avg Quality", "Shallow"], + rows, + [0.9, 2.8, 0.9, 1.0, 1.0]) + _spacer(doc) + + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 9. Recommended Actions +# ───────────────────────────────────────────────────────────────────────────── + +def _section_recommendations(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "9. Recommended Actions", 1) + _callout( + doc, + "Action Agenda", + "These actions are generated directly from the event hotspots, leading-activity quality signals, and project/location performance patterns in the report.", + fill="eef6fb", + accent=TEAL, + ) + _spacer(doc) + + for i, rec in enumerate(results.recommendations[:10], 1): + priority = "Immediate" if i <= 3 else "Next" + _callout(doc, f"{priority} Priority", rec, fill="f7fbff" if i <= 3 else "ffffff", accent=ALERT if i <= 3 else NAVY) + _spacer(doc) + + _spacer(doc) + _heading(doc, "Ongoing Monitoring Recommendations", 2) + for item in [ + "Track leading-activity volumes monthly by Business Unit against a set target (e.g. " + "minimum 4 LLCs per leader per month).", + "Track CCC/OCC/LLC quality monthly using shallow-entry rate, follow-up rate, and average quality score.", + "Review LLC topic coverage quarterly to ensure alignment with top event root causes.", + "Re-run this full report monthly or quarterly as new data becomes available.", + "Supplement quantitative analysis with qualitative review of LLC content quality.", + "Use the Business Unit activity-to-event ratio table to guide where SHEQ advisor " + "engagement should be prioritised.", + ]: + _bullet(doc, item) + + _page_break(doc) + + +# ───────────────────────────────────────────────────────────────────────────── +# 10. Methodology and Caveats +# ───────────────────────────────────────────────────────────────────────────── + +def _section_methodology(doc: Document, results: AnalysisResults) -> None: + _heading(doc, "10. Methodology and Caveats", 1) + trends = results.trends + + _heading(doc, "10.1 Data Sources", 2) + rows = [ + ["Events.xlsx", + "Incident and event records exported from the Ventia safety management system. " + "Covers all event types including injuries, motor vehicle events, close calls, " + "environmental events, and nonconformances."], + ["Safety_Energy.xlsx", + "Combined leading activity export covering all three activity types: Leader Learning " + "Conversations (LLC), Critical Control Checks (CCC), and Operational Control Checks (OCC). " + "This is treated as the primary leading indicator data source."], + ["LLC_Data.xlsx", + "Supplementary LLC export providing richer free-text data (conversation topics, CRP focus, " + "at-risk observations). Used primarily for theme and topic analysis. Record counts are " + "closely aligned with the LLC records in Safety_Energy.xlsx."], + ] + _add_table(doc, ["Source", "Description"], rows, [2.0, 4.5]) + _spacer(doc) + + _heading(doc, "10.2 Activity Type Definitions", 2) + _para(doc, + "Safety Energy is the combined analytical domain. It encompasses three activity types:", + size_pt=11, colour=NAVY) + for item in [ + "LLC (Leader Learning Conversation): A structured conversation between a leader and " + "a worker or work group, focused on safety topics, risk identification, and critical " + "controls.", + "CCC (Critical Control Check): A field verification that critical controls for high-risk " + "activities are in place and effective (e.g. working at height, hazardous energies).", + "OCC (Operational Control Check): A broader operational inspection or check covering " + "a range of work-area risk topics.", + "Note: In some legacy documentation or older exports, the label 'OCC' was used broadly " + "to cover what is now split into CCC and OCC. The current Safety_Energy.xlsx export " + "correctly separates these via the ModuleType field. No manual deduplication was required.", + ]: + _bullet(doc, item) + _spacer(doc) + + _heading(doc, "10.3 Analytical Approach", 2) + for item in [ + "Monthly trend analysis: Activities and events are aggregated by calendar month. " + "Trend direction is estimated by comparing recent-period averages against prior-period averages.", + f"Rolling two-year Safety Energy review: deeper trend and quality analysis uses a {trends.get('window_months', 24)}-month " + f"window from {trends.get('window_start', 'N/A')} to {trends.get('window_end', 'N/A')}, anchored to the latest Safety Energy record.", + "Effectiveness analysis: Business unit-level aggregates and overall monthly correlations " + "are used as proxies for effectiveness. Correlation is computed using Pearson r.", + "At-risk theme extraction: Free-text fields are scanned using a predefined keyword " + "dictionary (see config.py). Frequency counts are combined across sources with a " + "2× weight applied to event-source mentions (lagging signal).", + "Business Unit focus: BUs are flagged as 'declining' if second-half activity volume " + "is less than 70% of first-half volume within the analysis window.", + "Leading-activity quality scoring: records are scored using practical proxies including text richness, specificity, " + "risk recognition, action/follow-up language, learning evidence, and penalties for generic or duplicated wording.", + ]: + _bullet(doc, item) + _spacer(doc) + + _heading(doc, "10.4 Caveats and Limitations", 2) + for cav in results.caveats: + _bullet(doc, cav) + + _spacer(doc) + _para(doc, + f"Report generated: {datetime.now().strftime('%d %B %Y at %H:%M')}", + size_pt=9, colour=GREY) + + +# ───────────────────────────────────────────────────────────────────────────── +# Main entry point +# ───────────────────────────────────────────────────────────────────────────── + +def build_report(results: AnalysisResults, output_dir: str) -> str: + """ + Build the full SHEQ DOCX report from an AnalysisResults object. + + Parameters + ---------- + results : output of analysis_engine.run_full_analysis + output_dir : directory to write the .docx file into + + Returns + ------- + Absolute path to the generated .docx file. + """ + os.makedirs(output_dir, exist_ok=True) + doc = Document() + _bootstrap_styles(doc) + + log.info("Building DOCX report...") + + _title_page(doc, results) + _section_executive_summary(doc, results) + _section_data_quality(doc, results) + _section_events(doc, results) + _section_leading_overview(doc, results) + _section_effectiveness(doc, results) + _section_at_risk(doc, results) + _section_se_events(doc, results) + _section_focus_areas(doc, results) + _section_recommendations(doc, results) + _section_methodology(doc, results) + + output_path = os.path.join( + output_dir, + f"SHEQ_Safety_Performance_{datetime.now().strftime('%Y%m%d_%H%M')}.docx", + ) + doc.save(output_path) + log.info("Report saved to %s", output_path) + return output_path diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c910ded --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +pandas>=2.0 +openpyxl>=3.1 +matplotlib>=3.7 +python-docx>=1.1 +flask>=3.0 +python-pptx>=1.0 diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..9fc11c5 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,502 @@ + + + + + + SHEQanalator alpha + + + + + + + +
+

SHEQanator

+ {{ total_events }} events loaded • Safety Energy & LLC analysis enabled +
+ +
+ + + + +
+
+ + + +

Click Apply Filters to explore Events data

+

Use Download Report to generate the executive board-pack DOCX

+
+
+
+ + +
+ + + +