Files

297 lines
12 KiB
Python
Raw Permalink Normal View History

2026-04-20 15:23:18 +12:00
"""
app.py — SHEQ Analysis Tool — Flask web application.
Run:
python app.py
Then open http://localhost:5000
The sidebar has two sections:
1. Events Explorer — filter and chart Events data interactively.
2. Generate Report — run the full analysis across Events, Safety Energy,
and LLC Data and download a comprehensive DOCX report.
"""
from __future__ import annotations
import logging
import os
from datetime import datetime
import pandas as pd
from flask import Flask, jsonify, render_template, request, send_file
from config import (
EVENTS_FILE, LLC_FILE, SAFETY_ENERGY_FILE,
DEFAULT_PD1_NAME, DEFAULT_PD2_NAME,
DEFAULT_START_DATE, DEFAULT_SPLIT_DATE,
OUTPUT_DIR,
)
from data_loader import load_all, get_body_parts, load_and_prepare
from analysis_engine import run_full_analysis
from report_builder import build_report
from ppt_builder import build_presentation
# ── Logging ──────────────────────────────────────────────────────────────────
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(name)s %(message)s",
datefmt="%H:%M:%S",
)
log = logging.getLogger("app")
app = Flask(__name__)
# ── Cached raw DataFrames (loaded on first request) ──────────────────────────
_CACHE: dict[str, pd.DataFrame | None] = {
"events": None,
}
def _get_events_df() -> pd.DataFrame:
"""Return the raw Events DataFrame, loading from disk on first call."""
if _CACHE["events"] is None:
log.info("Loading Events from %s", EVENTS_FILE)
df = pd.read_excel(EVENTS_FILE)
# Normalise date column — handle "Monday, 25 March 2024" and ISO formats
date_col = "EventDate" if "EventDate" in df.columns else "Event Date"
df["_date"] = df[date_col].apply(_parse_one_date)
_CACHE["events"] = df
return _CACHE["events"].copy()
def _parse_one_date(val) -> pd.Timestamp:
if pd.isna(val):
return pd.NaT
s = str(val).strip()
if "," in s and len(s.split(",")[0].split()) == 1:
s = s.split(",", 1)[1].strip()
try:
return pd.to_datetime(s, dayfirst=True)
except Exception:
return pd.NaT
# ─────────────────────────────────────────────────────────────────────────────
# Web UI
# ─────────────────────────────────────────────────────────────────────────────
@app.route("/")
def index():
df = _get_events_df()
min_date = df["_date"].min().strftime("%Y-%m-%d")
max_date = df["_date"].max().strftime("%Y-%m-%d")
# Handle both column name variants
evt_col = "EventType" if "EventType" in df.columns else "Event Type"
cons_col = "Actual Consequence"
event_types = sorted(df[evt_col].dropna().unique().tolist())
consequences = sorted(df[cons_col].dropna().unique().tolist())
return render_template(
"index.html",
min_date=min_date,
max_date=max_date,
event_types=event_types,
consequences=consequences,
total_events=len(df),
)
# ─────────────────────────────────────────────────────────────────────────────
# Events Explorer API
# ─────────────────────────────────────────────────────────────────────────────
@app.route("/api/filter", methods=["POST"])
def api_filter():
"""Return filtered summary stats as JSON for the Events Explorer."""
params = request.json or {}
df = _get_events_df()
evt_col = "EventType" if "EventType" in df.columns else "Event Type"
cons_col = "Actual Consequence"
crp_col = "CRP Involved" if "CRP Involved" in df.columns else "CRPInvolved"
rc_col = "Root Cause Category"
inj_col = "Ventia Injury Classification"
bp_col = "Bodily Location"
# Filters
if params.get("start_date"):
df = df[df["_date"] >= pd.Timestamp(params["start_date"])]
if params.get("end_date"):
df = df[df["_date"] <= pd.Timestamp(params["end_date"])]
if params.get("event_types"):
df = df[df[evt_col].isin(params["event_types"])]
if params.get("consequences"):
df = df[df[cons_col].isin(params["consequences"])]
if len(df) == 0:
return jsonify({"error": "No events match the selected filters.", "total": 0})
# Summary stats
evt_counts = df[evt_col].value_counts().to_dict()
cons_counts = df[cons_col].value_counts().to_dict()
inj_class = (
df[inj_col].value_counts().to_dict()
if inj_col in df.columns else {}
)
dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
dow = (
df["_date"].dt.day_name().value_counts()
.reindex(dow_order, fill_value=0).to_dict()
)
monthly = df.groupby(df["_date"].dt.to_period("M")).size()
monthly_data = {str(k): int(v) for k, v in monthly.items()}
bp = (
get_body_parts(df[bp_col]).value_counts().head(10).to_dict()
if bp_col in df.columns else {}
)
rc = (
df[rc_col].value_counts().to_dict()
if rc_col in df.columns else {}
)
crp: dict = {}
if crp_col in df.columns:
crp = df[crp_col].value_counts().to_dict()
crp.pop("None Identified", None)
crp.pop("Under Investigation", None)
# Investigation performance — use available columns
lag_col = next((c for c in ("Days to Investigate", "Event Lag", "Days to Enter")
if c in df.columns), None)
close_col = "Days to Close" if "Days to Close" in df.columns else None
inv_med = df[lag_col].dropna().median() if lag_col else None
close_med = df[close_col].dropna().median() if close_col else None
return jsonify({
"total": len(df),
"date_range": (
f"{df['_date'].min().strftime('%d %b %Y')} "
f"\u2013 {df['_date'].max().strftime('%d %b %Y')}"
),
"event_types": evt_counts,
"consequences": cons_counts,
"injury_classification": inj_class,
"day_of_week": dow,
"monthly": monthly_data,
"body_parts": bp,
"root_causes": rc,
"crp": crp,
"median_investigate_days": round(inv_med, 1) if inv_med and pd.notna(inv_med) else None,
"median_close_days": round(close_med, 1) if close_med and pd.notna(close_med) else None,
"closed_pct": round(
(df["Status"] == "Closed").sum() / len(df) * 100, 1
) if "Status" in df.columns else None,
})
# ─────────────────────────────────────────────────────────────────────────────
# Comprehensive Report API
# ─────────────────────────────────────────────────────────────────────────────
@app.route("/api/generate_full_report", methods=["POST"])
def api_generate_full_report():
"""
Load all three data sources, run the full analysis pipeline, build the
DOCX report, and return it as a file download.
"""
params = request.json or {}
start_date = params.get("start_date", DEFAULT_START_DATE)
export_format = str(params.get("export_format", "docx")).lower()
if export_format not in {"docx", "pptx"}:
return jsonify({"success": False, "error": "Supported export formats are DOCX and PPTX."}), 400
events_path = params.get("events_file", EVENTS_FILE)
se_path = params.get("safety_energy_file", SAFETY_ENERGY_FILE)
llc_path = params.get("llc_file", LLC_FILE)
run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
run_dir = os.path.join(OUTPUT_DIR, run_id)
try:
log.info("Starting full report generation (run_id=%s)", run_id)
data = load_all(events_path, se_path, llc_path)
results = run_full_analysis(
events = data["events"],
safety_energy = data["safety_energy"],
llc = data["llc"],
start_date = start_date,
split_date = DEFAULT_SPLIT_DATE,
pd1_name = DEFAULT_PD1_NAME,
pd2_name = DEFAULT_PD2_NAME,
output_dir = run_dir,
)
if export_format == "pptx":
report_path = build_presentation(results, run_dir)
download_name = f"SHEQ_Safety_Performance_{run_id}.pptx"
mimetype = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
else:
report_path = build_report(results, run_dir)
download_name = f"SHEQ_Safety_Performance_{run_id}.docx"
mimetype = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
log.info("Report ready: %s", report_path)
return send_file(
report_path,
as_attachment=True,
download_name=download_name,
mimetype=mimetype,
)
except FileNotFoundError as e:
log.error("File not found: %s", e)
return jsonify({"success": False, "error": str(e)}), 404
except Exception as e:
log.exception("Report generation failed")
return jsonify({"success": False, "error": str(e)}), 500
# ── Legacy endpoint (kept for backwards compatibility) ────────────────────────
@app.route("/api/download_report", methods=["POST"])
def api_download_report():
"""Legacy Events-only PD comparison report (preserved from v1)."""
params = request.json or {}
start_date = params.get("start_date", DEFAULT_START_DATE)
split_date = params.get("split_date", DEFAULT_SPLIT_DATE)
pd1_name = params.get("pd1_name", DEFAULT_PD1_NAME)
pd2_name = params.get("pd2_name", DEFAULT_PD2_NAME)
run_dir = os.path.join(OUTPUT_DIR, datetime.now().strftime("%Y%m%d_%H%M%S"))
try:
from analysis import run_analysis
docx_path = run_analysis(
EVENTS_FILE, start_date, split_date, pd1_name, pd2_name, run_dir
)
return send_file(
docx_path, as_attachment=True,
download_name="SHEQ_PD_Comparison.docx",
)
except Exception as e:
log.exception("Legacy report generation failed")
return jsonify({"success": False, "error": str(e)}), 500
# ─────────────────────────────────────────────────────────────────────────────
# Entry point
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
os.makedirs(OUTPUT_DIR, exist_ok=True)
log.info("SHEQ Analysis Tool starting on http://localhost:5000")
log.info(" Events: %s", EVENTS_FILE)
log.info(" Safety Energy: %s", SAFETY_ENERGY_FILE)
log.info(" LLC Data: %s", LLC_FILE)
app.run(debug=True, port=5000)