""" config.py — Central configuration for the SHEQ Analysis Tool. Holds file paths, column name mappings, activity type definitions, severity orders, and brand constants. Edit this file when source column names change; do not touch the analysis or report modules. """ from __future__ import annotations import os # ── Default file paths (resolved relative to this file's directory) ────────── BASE_DIR = os.path.dirname(os.path.abspath(__file__)) EVENTS_FILE = os.environ.get("SHEQ_EVENTS_FILE", os.path.join(BASE_DIR, "Events.xlsx")) LLC_FILE = os.environ.get("SHEQ_LLC_FILE", os.path.join(BASE_DIR, "LLC_Data.xlsx")) SAFETY_ENERGY_FILE = os.environ.get("SHEQ_SE_FILE", os.path.join(BASE_DIR, "Safety_Energy.xlsx")) OUTPUT_DIR = os.environ.get("SHEQ_OUTPUT_DIR", os.path.join(BASE_DIR, "output")) # ── Events.xlsx column mapping ───────────────────────────────────────────────── # Maps a normalised internal name → list of candidate column names in order # of preference. data_loader picks the first match it finds. EVENTS_COL_MAP: dict[str, list[str]] = { "date": ["EventDate", "Event Date", "Date"], "event_type": ["EventType", "Event Type"], "consequence": ["Actual Consequence"], "potential": ["Potential Consequence"], "status": ["Status"], "business_unit": ["Business Unit"], "project": ["Project"], "location": ["Location", "Location.1"], "crp": ["CRP Involved", "CRPInvolved"], "root_cause_cat": ["Root Cause Category"], "root_cause_sub": ["Root Cause Sub-Category"], "injury_class": ["Ventia Injury Classification"], "body_part": ["Bodily Location"], "brief_desc": ["Brief Description"], "event_desc": ["Event Description"], "days_to_enter": ["Days to Enter"], "event_lag": ["Event Lag"], "report_lag": ["Report Lag"], "investigation_done":["Investigation Completed"], "hipo": ["HiPo"], "critical_event": ["Critical Event"], } # ── Safety_Energy.xlsx column mapping ───────────────────────────────────────── SE_COL_MAP: dict[str, list[str]] = { "date": ["EventDate", "Date Conducted", "CompletedDate"], "module_name": ["ModuleName"], "module_prefix": ["ModulePrefix"], "module_type": ["ModuleType"], "leader": ["CompletedByName", "Conducted By"], "business_unit": ["Business Unit"], "project": ["Project"], "location": ["Location", "Specific Location"], "shift": ["Shift"], "at_risk_aspects":["At Risk Aspects"], "total_questions":["Total Questions"], "actions": ["Actions"], "atl_actions": ["ATL Actions"], "at_risk_crp": ["At risk CRP"], "llc_topic": ["LLC Topic"], "at_risk_obs": ["At risk situation/observation"], "positive_obs": ["Positive Observation"], "find_fix": ["Find & Fix", "Find&Fix"], "participants": ["Number of people spoken to", "Participants"], "time_spent": ["Time Spent on LLC"], } # ── LLC_Data.xlsx column mapping ─────────────────────────────────────────────── LLC_COL_MAP: dict[str, list[str]] = { "date": ["EventDate", "Date Conducted", "Date"], "topic": ["LLC Topic"], "leader": ["Conducted by"], "business_unit": ["Business Unit"], "project": ["Project"], "location": ["Location", "Specific Location"], "crp_focus": ["CRP in Focus"], "at_risk_obs": ["At risk situation/observation"], "positive_obs": ["Positive Observation"], "at_risk_flag": ["At risk work practices observed"], "participants": ["Participants"], "find_fix": ["Find&Fix", "Find & Fix"], "review_action": ["Review & Action"], "shift": ["Shift"], } # ── Activity type normalisation ──────────────────────────────────────────────── # Safety_Energy ModuleType values → display label MODULE_TYPE_LABELS: dict[str, str] = { "Leader Learning Conversation": "LLC", "Critical Control Check": "CCC", "Operational Control Check": "OCC", } # Canonical leading-activity types used throughout the report LEADING_ACTIVITY_TYPES = ["LLC", "CCC", "OCC"] # NOTE on duplicate "OCC" label: # In some legacy notes and older exports the label "OCC" appeared for items # that are now split into "CCC" (Critical Control Check) and "OCC" # (Operational Control Check). In the current Safety_Energy export both # CCC and OCC are already correctly separated via ModuleType. The LLC_Data # export contains only LLC-type records. No manual deduplication is # required; however we collapse all three under "Safety Energy" when # computing the combined domain total. # ── Consequence severity ordering (low → high) ──────────────────────────────── CONSEQUENCE_ORDER = ["Negligible", "Minor", "Moderate", "Major", "Substantial"] CONSEQUENCE_SERIOUS = {"Moderate", "Major", "Substantial"} # ── Brand colours (hex) per DESIGN.md ───────────────────────────────────────── DEEP_BLUE = "#0b3254" SKY_BLUE = "#13b5ea" DARK_GREEN = "#006e47" MID_GREEN = "#009946" LIGHT_GREEN = "#7bc143" PURPLE = "#96358d" AMBER = "#d97706" RED = "#dc2626" MUTED = "#64748b" CARD_BG = "#f0f5fa" PAGE_BG = "#f8fafc" BORDER = "#e2e8f0" CHART_PALETTE = [DEEP_BLUE, SKY_BLUE, DARK_GREEN, MID_GREEN, LIGHT_GREEN, PURPLE, AMBER, RED] # Activity type → colour mapping for charts ACTIVITY_COLOURS: dict[str, str] = { "LLC": DEEP_BLUE, "CCC": SKY_BLUE, "OCC": DARK_GREEN, } # ── Report defaults ──────────────────────────────────────────────────────────── DEFAULT_START_DATE = "2024-01-01" DEFAULT_SPLIT_DATE = "2025-04-01" DEFAULT_PD1_NAME = "Matthew Arthur" DEFAULT_PD2_NAME = "Manga" # Minimum activity count for a leader to be included in focus tables LEADER_MIN_ACTIVITIES = 5 # Correlation: minimum month-count required before reporting a correlation CORR_MIN_MONTHS = 4 # Rolling window used for deeper Safety Energy trend analysis TWO_YEAR_WINDOW_MONTHS = 24 # Quality scoring bands for leading-activity records QUALITY_SCORE_BANDS = { "high_value": 70, "meaningful": 55, "shallow": 35, } # Keyword groups for at-risk theme extraction from free-text fields AT_RISK_KEYWORDS: dict[str, list[str]] = { "Manual Handling": ["manual handling", "lifting", "carrying", "musculoskeletal", "msd"], "Working at Height": ["height", "ladder", "scaffold", "fall", "elevated"], "Traffic/MVA": ["vehicle", "traffic", "driving", "reversing", "motor", "mva", "collision"], "Hazardous Energy": ["energy", "electrical", "isolation", "loto", "stored energy", "pressure"], "Slips/Trips/Falls": ["slip", "trip", "fall", "housekeeping", "wet floor", "uneven"], "PPE": ["ppe", "personal protective", "helmet", "harness", "gloves", "safety glasses"], "Fatigue": ["fatigue", "tired", "hours", "shift length", "rest"], "Communication": ["communication", "briefing", "toolbox", "handover", "instruction"], "Supervision": ["supervision", "supervision", "oversight", "leadership", "monitoring"], "CRP Compliance": ["crp", "critical risk", "permit", "isolation", "confined space", "work at height"], }