297 lines
12 KiB
Python
297 lines
12 KiB
Python
"""
|
|
app.py — SHEQ Analysis Tool — Flask web application.
|
|
|
|
Run:
|
|
python app.py
|
|
Then open http://localhost:5000
|
|
|
|
The sidebar has two sections:
|
|
1. Events Explorer — filter and chart Events data interactively.
|
|
2. Generate Report — run the full analysis across Events, Safety Energy,
|
|
and LLC Data and download a comprehensive DOCX report.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from datetime import datetime
|
|
|
|
import pandas as pd
|
|
from flask import Flask, jsonify, render_template, request, send_file
|
|
|
|
from config import (
|
|
EVENTS_FILE, LLC_FILE, SAFETY_ENERGY_FILE,
|
|
DEFAULT_PD1_NAME, DEFAULT_PD2_NAME,
|
|
DEFAULT_START_DATE, DEFAULT_SPLIT_DATE,
|
|
OUTPUT_DIR,
|
|
)
|
|
from data_loader import load_all, get_body_parts, load_and_prepare
|
|
from analysis_engine import run_full_analysis
|
|
from report_builder import build_report
|
|
from ppt_builder import build_presentation
|
|
|
|
# ── Logging ──────────────────────────────────────────────────────────────────
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s %(levelname)-8s %(name)s %(message)s",
|
|
datefmt="%H:%M:%S",
|
|
)
|
|
log = logging.getLogger("app")
|
|
|
|
app = Flask(__name__)
|
|
|
|
# ── Cached raw DataFrames (loaded on first request) ──────────────────────────
|
|
_CACHE: dict[str, pd.DataFrame | None] = {
|
|
"events": None,
|
|
}
|
|
|
|
|
|
def _get_events_df() -> pd.DataFrame:
|
|
"""Return the raw Events DataFrame, loading from disk on first call."""
|
|
if _CACHE["events"] is None:
|
|
log.info("Loading Events from %s", EVENTS_FILE)
|
|
df = pd.read_excel(EVENTS_FILE)
|
|
# Normalise date column — handle "Monday, 25 March 2024" and ISO formats
|
|
date_col = "EventDate" if "EventDate" in df.columns else "Event Date"
|
|
df["_date"] = df[date_col].apply(_parse_one_date)
|
|
_CACHE["events"] = df
|
|
return _CACHE["events"].copy()
|
|
|
|
|
|
def _parse_one_date(val) -> pd.Timestamp:
|
|
if pd.isna(val):
|
|
return pd.NaT
|
|
s = str(val).strip()
|
|
if "," in s and len(s.split(",")[0].split()) == 1:
|
|
s = s.split(",", 1)[1].strip()
|
|
try:
|
|
return pd.to_datetime(s, dayfirst=True)
|
|
except Exception:
|
|
return pd.NaT
|
|
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Web UI
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
@app.route("/")
|
|
def index():
|
|
df = _get_events_df()
|
|
min_date = df["_date"].min().strftime("%Y-%m-%d")
|
|
max_date = df["_date"].max().strftime("%Y-%m-%d")
|
|
|
|
# Handle both column name variants
|
|
evt_col = "EventType" if "EventType" in df.columns else "Event Type"
|
|
cons_col = "Actual Consequence"
|
|
|
|
event_types = sorted(df[evt_col].dropna().unique().tolist())
|
|
consequences = sorted(df[cons_col].dropna().unique().tolist())
|
|
|
|
return render_template(
|
|
"index.html",
|
|
min_date=min_date,
|
|
max_date=max_date,
|
|
event_types=event_types,
|
|
consequences=consequences,
|
|
total_events=len(df),
|
|
)
|
|
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Events Explorer API
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
@app.route("/api/filter", methods=["POST"])
|
|
def api_filter():
|
|
"""Return filtered summary stats as JSON for the Events Explorer."""
|
|
params = request.json or {}
|
|
df = _get_events_df()
|
|
|
|
evt_col = "EventType" if "EventType" in df.columns else "Event Type"
|
|
cons_col = "Actual Consequence"
|
|
crp_col = "CRP Involved" if "CRP Involved" in df.columns else "CRPInvolved"
|
|
rc_col = "Root Cause Category"
|
|
inj_col = "Ventia Injury Classification"
|
|
bp_col = "Bodily Location"
|
|
|
|
# Filters
|
|
if params.get("start_date"):
|
|
df = df[df["_date"] >= pd.Timestamp(params["start_date"])]
|
|
if params.get("end_date"):
|
|
df = df[df["_date"] <= pd.Timestamp(params["end_date"])]
|
|
if params.get("event_types"):
|
|
df = df[df[evt_col].isin(params["event_types"])]
|
|
if params.get("consequences"):
|
|
df = df[df[cons_col].isin(params["consequences"])]
|
|
|
|
if len(df) == 0:
|
|
return jsonify({"error": "No events match the selected filters.", "total": 0})
|
|
|
|
# Summary stats
|
|
evt_counts = df[evt_col].value_counts().to_dict()
|
|
cons_counts = df[cons_col].value_counts().to_dict()
|
|
|
|
inj_class = (
|
|
df[inj_col].value_counts().to_dict()
|
|
if inj_col in df.columns else {}
|
|
)
|
|
|
|
dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
|
dow = (
|
|
df["_date"].dt.day_name().value_counts()
|
|
.reindex(dow_order, fill_value=0).to_dict()
|
|
)
|
|
|
|
monthly = df.groupby(df["_date"].dt.to_period("M")).size()
|
|
monthly_data = {str(k): int(v) for k, v in monthly.items()}
|
|
|
|
bp = (
|
|
get_body_parts(df[bp_col]).value_counts().head(10).to_dict()
|
|
if bp_col in df.columns else {}
|
|
)
|
|
|
|
rc = (
|
|
df[rc_col].value_counts().to_dict()
|
|
if rc_col in df.columns else {}
|
|
)
|
|
|
|
crp: dict = {}
|
|
if crp_col in df.columns:
|
|
crp = df[crp_col].value_counts().to_dict()
|
|
crp.pop("None Identified", None)
|
|
crp.pop("Under Investigation", None)
|
|
|
|
# Investigation performance — use available columns
|
|
lag_col = next((c for c in ("Days to Investigate", "Event Lag", "Days to Enter")
|
|
if c in df.columns), None)
|
|
close_col = "Days to Close" if "Days to Close" in df.columns else None
|
|
|
|
inv_med = df[lag_col].dropna().median() if lag_col else None
|
|
close_med = df[close_col].dropna().median() if close_col else None
|
|
|
|
return jsonify({
|
|
"total": len(df),
|
|
"date_range": (
|
|
f"{df['_date'].min().strftime('%d %b %Y')} "
|
|
f"\u2013 {df['_date'].max().strftime('%d %b %Y')}"
|
|
),
|
|
"event_types": evt_counts,
|
|
"consequences": cons_counts,
|
|
"injury_classification": inj_class,
|
|
"day_of_week": dow,
|
|
"monthly": monthly_data,
|
|
"body_parts": bp,
|
|
"root_causes": rc,
|
|
"crp": crp,
|
|
"median_investigate_days": round(inv_med, 1) if inv_med and pd.notna(inv_med) else None,
|
|
"median_close_days": round(close_med, 1) if close_med and pd.notna(close_med) else None,
|
|
"closed_pct": round(
|
|
(df["Status"] == "Closed").sum() / len(df) * 100, 1
|
|
) if "Status" in df.columns else None,
|
|
})
|
|
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Comprehensive Report API
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
@app.route("/api/generate_full_report", methods=["POST"])
|
|
def api_generate_full_report():
|
|
"""
|
|
Load all three data sources, run the full analysis pipeline, build the
|
|
DOCX report, and return it as a file download.
|
|
"""
|
|
params = request.json or {}
|
|
|
|
start_date = params.get("start_date", DEFAULT_START_DATE)
|
|
export_format = str(params.get("export_format", "docx")).lower()
|
|
if export_format not in {"docx", "pptx"}:
|
|
return jsonify({"success": False, "error": "Supported export formats are DOCX and PPTX."}), 400
|
|
|
|
events_path = params.get("events_file", EVENTS_FILE)
|
|
se_path = params.get("safety_energy_file", SAFETY_ENERGY_FILE)
|
|
llc_path = params.get("llc_file", LLC_FILE)
|
|
|
|
run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
run_dir = os.path.join(OUTPUT_DIR, run_id)
|
|
|
|
try:
|
|
log.info("Starting full report generation (run_id=%s)", run_id)
|
|
data = load_all(events_path, se_path, llc_path)
|
|
|
|
results = run_full_analysis(
|
|
events = data["events"],
|
|
safety_energy = data["safety_energy"],
|
|
llc = data["llc"],
|
|
start_date = start_date,
|
|
split_date = DEFAULT_SPLIT_DATE,
|
|
pd1_name = DEFAULT_PD1_NAME,
|
|
pd2_name = DEFAULT_PD2_NAME,
|
|
output_dir = run_dir,
|
|
)
|
|
|
|
if export_format == "pptx":
|
|
report_path = build_presentation(results, run_dir)
|
|
download_name = f"SHEQ_Safety_Performance_{run_id}.pptx"
|
|
mimetype = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
|
else:
|
|
report_path = build_report(results, run_dir)
|
|
download_name = f"SHEQ_Safety_Performance_{run_id}.docx"
|
|
mimetype = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
log.info("Report ready: %s", report_path)
|
|
|
|
return send_file(
|
|
report_path,
|
|
as_attachment=True,
|
|
download_name=download_name,
|
|
mimetype=mimetype,
|
|
)
|
|
|
|
except FileNotFoundError as e:
|
|
log.error("File not found: %s", e)
|
|
return jsonify({"success": False, "error": str(e)}), 404
|
|
except Exception as e:
|
|
log.exception("Report generation failed")
|
|
return jsonify({"success": False, "error": str(e)}), 500
|
|
|
|
|
|
# ── Legacy endpoint (kept for backwards compatibility) ────────────────────────
|
|
|
|
@app.route("/api/download_report", methods=["POST"])
|
|
def api_download_report():
|
|
"""Legacy Events-only PD comparison report (preserved from v1)."""
|
|
params = request.json or {}
|
|
start_date = params.get("start_date", DEFAULT_START_DATE)
|
|
split_date = params.get("split_date", DEFAULT_SPLIT_DATE)
|
|
pd1_name = params.get("pd1_name", DEFAULT_PD1_NAME)
|
|
pd2_name = params.get("pd2_name", DEFAULT_PD2_NAME)
|
|
|
|
run_dir = os.path.join(OUTPUT_DIR, datetime.now().strftime("%Y%m%d_%H%M%S"))
|
|
|
|
try:
|
|
from analysis import run_analysis
|
|
docx_path = run_analysis(
|
|
EVENTS_FILE, start_date, split_date, pd1_name, pd2_name, run_dir
|
|
)
|
|
return send_file(
|
|
docx_path, as_attachment=True,
|
|
download_name="SHEQ_PD_Comparison.docx",
|
|
)
|
|
except Exception as e:
|
|
log.exception("Legacy report generation failed")
|
|
return jsonify({"success": False, "error": str(e)}), 500
|
|
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Entry point
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
if __name__ == "__main__":
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
log.info("SHEQ Analysis Tool starting on http://localhost:5000")
|
|
log.info(" Events: %s", EVENTS_FILE)
|
|
log.info(" Safety Energy: %s", SAFETY_ENERGY_FILE)
|
|
log.info(" LLC Data: %s", LLC_FILE)
|
|
app.run(debug=True, port=5000)
|