""" app.py — SHEQ Analysis Tool — Flask web application. Run: python app.py Then open http://localhost:5000 The sidebar has two sections: 1. Events Explorer — filter and chart Events data interactively. 2. Generate Report — run the full analysis across Events, Safety Energy, and LLC Data and download a comprehensive DOCX report. """ from __future__ import annotations import logging import os from datetime import datetime import pandas as pd from flask import Flask, jsonify, render_template, request, send_file from config import ( EVENTS_FILE, LLC_FILE, SAFETY_ENERGY_FILE, DEFAULT_PD1_NAME, DEFAULT_PD2_NAME, DEFAULT_START_DATE, DEFAULT_SPLIT_DATE, OUTPUT_DIR, ) from data_loader import load_all, get_body_parts, load_and_prepare from analysis_engine import run_full_analysis from report_builder import build_report from ppt_builder import build_presentation # ── Logging ────────────────────────────────────────────────────────────────── logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)-8s %(name)s %(message)s", datefmt="%H:%M:%S", ) log = logging.getLogger("app") app = Flask(__name__) # ── Cached raw DataFrames (loaded on first request) ────────────────────────── _CACHE: dict[str, pd.DataFrame | None] = { "events": None, } def _get_events_df() -> pd.DataFrame: """Return the raw Events DataFrame, loading from disk on first call.""" if _CACHE["events"] is None: log.info("Loading Events from %s", EVENTS_FILE) df = pd.read_excel(EVENTS_FILE) # Normalise date column — handle "Monday, 25 March 2024" and ISO formats date_col = "EventDate" if "EventDate" in df.columns else "Event Date" df["_date"] = df[date_col].apply(_parse_one_date) _CACHE["events"] = df return _CACHE["events"].copy() def _parse_one_date(val) -> pd.Timestamp: if pd.isna(val): return pd.NaT s = str(val).strip() if "," in s and len(s.split(",")[0].split()) == 1: s = s.split(",", 1)[1].strip() try: return pd.to_datetime(s, dayfirst=True) except Exception: return pd.NaT # ───────────────────────────────────────────────────────────────────────────── # Web UI # ───────────────────────────────────────────────────────────────────────────── @app.route("/") def index(): df = _get_events_df() min_date = df["_date"].min().strftime("%Y-%m-%d") max_date = df["_date"].max().strftime("%Y-%m-%d") # Handle both column name variants evt_col = "EventType" if "EventType" in df.columns else "Event Type" cons_col = "Actual Consequence" event_types = sorted(df[evt_col].dropna().unique().tolist()) consequences = sorted(df[cons_col].dropna().unique().tolist()) return render_template( "index.html", min_date=min_date, max_date=max_date, event_types=event_types, consequences=consequences, total_events=len(df), ) # ───────────────────────────────────────────────────────────────────────────── # Events Explorer API # ───────────────────────────────────────────────────────────────────────────── @app.route("/api/filter", methods=["POST"]) def api_filter(): """Return filtered summary stats as JSON for the Events Explorer.""" params = request.json or {} df = _get_events_df() evt_col = "EventType" if "EventType" in df.columns else "Event Type" cons_col = "Actual Consequence" crp_col = "CRP Involved" if "CRP Involved" in df.columns else "CRPInvolved" rc_col = "Root Cause Category" inj_col = "Ventia Injury Classification" bp_col = "Bodily Location" # Filters if params.get("start_date"): df = df[df["_date"] >= pd.Timestamp(params["start_date"])] if params.get("end_date"): df = df[df["_date"] <= pd.Timestamp(params["end_date"])] if params.get("event_types"): df = df[df[evt_col].isin(params["event_types"])] if params.get("consequences"): df = df[df[cons_col].isin(params["consequences"])] if len(df) == 0: return jsonify({"error": "No events match the selected filters.", "total": 0}) # Summary stats evt_counts = df[evt_col].value_counts().to_dict() cons_counts = df[cons_col].value_counts().to_dict() inj_class = ( df[inj_col].value_counts().to_dict() if inj_col in df.columns else {} ) dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] dow = ( df["_date"].dt.day_name().value_counts() .reindex(dow_order, fill_value=0).to_dict() ) monthly = df.groupby(df["_date"].dt.to_period("M")).size() monthly_data = {str(k): int(v) for k, v in monthly.items()} bp = ( get_body_parts(df[bp_col]).value_counts().head(10).to_dict() if bp_col in df.columns else {} ) rc = ( df[rc_col].value_counts().to_dict() if rc_col in df.columns else {} ) crp: dict = {} if crp_col in df.columns: crp = df[crp_col].value_counts().to_dict() crp.pop("None Identified", None) crp.pop("Under Investigation", None) # Investigation performance — use available columns lag_col = next((c for c in ("Days to Investigate", "Event Lag", "Days to Enter") if c in df.columns), None) close_col = "Days to Close" if "Days to Close" in df.columns else None inv_med = df[lag_col].dropna().median() if lag_col else None close_med = df[close_col].dropna().median() if close_col else None return jsonify({ "total": len(df), "date_range": ( f"{df['_date'].min().strftime('%d %b %Y')} " f"\u2013 {df['_date'].max().strftime('%d %b %Y')}" ), "event_types": evt_counts, "consequences": cons_counts, "injury_classification": inj_class, "day_of_week": dow, "monthly": monthly_data, "body_parts": bp, "root_causes": rc, "crp": crp, "median_investigate_days": round(inv_med, 1) if inv_med and pd.notna(inv_med) else None, "median_close_days": round(close_med, 1) if close_med and pd.notna(close_med) else None, "closed_pct": round( (df["Status"] == "Closed").sum() / len(df) * 100, 1 ) if "Status" in df.columns else None, }) # ───────────────────────────────────────────────────────────────────────────── # Comprehensive Report API # ───────────────────────────────────────────────────────────────────────────── @app.route("/api/generate_full_report", methods=["POST"]) def api_generate_full_report(): """ Load all three data sources, run the full analysis pipeline, build the DOCX report, and return it as a file download. """ params = request.json or {} start_date = params.get("start_date", DEFAULT_START_DATE) export_format = str(params.get("export_format", "docx")).lower() if export_format not in {"docx", "pptx"}: return jsonify({"success": False, "error": "Supported export formats are DOCX and PPTX."}), 400 events_path = params.get("events_file", EVENTS_FILE) se_path = params.get("safety_energy_file", SAFETY_ENERGY_FILE) llc_path = params.get("llc_file", LLC_FILE) run_id = datetime.now().strftime("%Y%m%d_%H%M%S") run_dir = os.path.join(OUTPUT_DIR, run_id) try: log.info("Starting full report generation (run_id=%s)", run_id) data = load_all(events_path, se_path, llc_path) results = run_full_analysis( events = data["events"], safety_energy = data["safety_energy"], llc = data["llc"], start_date = start_date, split_date = DEFAULT_SPLIT_DATE, pd1_name = DEFAULT_PD1_NAME, pd2_name = DEFAULT_PD2_NAME, output_dir = run_dir, ) if export_format == "pptx": report_path = build_presentation(results, run_dir) download_name = f"SHEQ_Safety_Performance_{run_id}.pptx" mimetype = "application/vnd.openxmlformats-officedocument.presentationml.presentation" else: report_path = build_report(results, run_dir) download_name = f"SHEQ_Safety_Performance_{run_id}.docx" mimetype = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" log.info("Report ready: %s", report_path) return send_file( report_path, as_attachment=True, download_name=download_name, mimetype=mimetype, ) except FileNotFoundError as e: log.error("File not found: %s", e) return jsonify({"success": False, "error": str(e)}), 404 except Exception as e: log.exception("Report generation failed") return jsonify({"success": False, "error": str(e)}), 500 # ── Legacy endpoint (kept for backwards compatibility) ──────────────────────── @app.route("/api/download_report", methods=["POST"]) def api_download_report(): """Legacy Events-only PD comparison report (preserved from v1).""" params = request.json or {} start_date = params.get("start_date", DEFAULT_START_DATE) split_date = params.get("split_date", DEFAULT_SPLIT_DATE) pd1_name = params.get("pd1_name", DEFAULT_PD1_NAME) pd2_name = params.get("pd2_name", DEFAULT_PD2_NAME) run_dir = os.path.join(OUTPUT_DIR, datetime.now().strftime("%Y%m%d_%H%M%S")) try: from analysis import run_analysis docx_path = run_analysis( EVENTS_FILE, start_date, split_date, pd1_name, pd2_name, run_dir ) return send_file( docx_path, as_attachment=True, download_name="SHEQ_PD_Comparison.docx", ) except Exception as e: log.exception("Legacy report generation failed") return jsonify({"success": False, "error": str(e)}), 500 # ───────────────────────────────────────────────────────────────────────────── # Entry point # ───────────────────────────────────────────────────────────────────────────── if __name__ == "__main__": os.makedirs(OUTPUT_DIR, exist_ok=True) log.info("SHEQ Analysis Tool starting on http://localhost:5000") log.info(" Events: %s", EVENTS_FILE) log.info(" Safety Energy: %s", SAFETY_ENERGY_FILE) log.info(" LLC Data: %s", LLC_FILE) app.run(debug=True, port=5000)