Spaces:

FrAnKu34t23
/

ConstructionRiskPredict

Sleeping

App Files Files Community

FrAnKu34t23 commited on Jul 30, 2025

Commit

23f27a5

verified ·

1 Parent(s): 74806e4

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -235

app.py CHANGED Viewed

@@ -2,28 +2,28 @@ import gradio as gr
 import torch
 import re
 import traceback
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-from peft import PeftModel
-import ast
 import json
 import warnings
 warnings.filterwarnings("ignore")
 import os
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
 # Configuration
 BASE_MODEL_ID = "distilgpt2"
-LORA_MODEL_PATH = "FrAnKu34t23/Construction_Risk_Prediction_Model_v2"
-model = None
-tokenizer = None
-# Load once at startup
 injury_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 def classify_injury_zero_shot(description):
     candidate_labels = [
-        "Low severity injury (minor discomfort or bruise) or unrelevan cases",
         "Medium severity injury (sprain, strain, moderate pain)",
         "High severity injury (fracture, major trauma, amputation, fatal)"
     ]
@@ -32,28 +32,24 @@ def classify_injury_zero_shot(description):
         candidate_labels[1]: "Medium",
         candidate_labels[2]: "High"
     }
     result = injury_classifier(description, candidate_labels)
-    top_label = result["labels"][0]
-    for label, score in zip(result['labels'], result['scores']):
-        print(f"{label}: {score:.2f}")
-    return label_mapping[top_label]
-def load_model():
-    global model, tokenizer
     try:
-        print("🔄 Loading base model and tokenizer...")
-        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
-        tokenizer.pad_token = tokenizer.eos_token
-        base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_ID)
-        model = PeftModel.from_pretrained(base_model, LORA_MODEL_PATH)
-        model.eval()
-        print("✅ Model loaded successfully!")
         return True
     except Exception as e:
-        print(f"❌ Error loading model: {e}")
         return False
 def format_input(scenario_text):
@@ -62,113 +58,17 @@ def format_input(scenario_text):
         scenario = ", " + scenario.lstrip(", ")
     return f"Based on the situation, predict potential hazards and injuries. {scenario}<|endoftext|>"
-def clean_raw_json_string(raw_text):
-    """Clean malformed quotes and characters from model output before parsing."""
-    # Normalize bad quotes
-    cleaned = raw_text.replace("‘", "'").replace("’", "'")
-    cleaned = cleaned.replace("“", '"').replace("”", '"')
-    cleaned = cleaned.replace("''", '"').replace("``", '"').replace("†", "")
-    # Fix common errors: smart quotes, double single quotes, etc.
-    cleaned = re.sub(r'([{\[,])\s*"', r'\1 "', cleaned)
-    cleaned = re.sub(r'"\s*([}\],])', r'" \1', cleaned)
-    return cleaned
-def extract_json_object(text):
-    """Extract and parse the first valid JSON object from text, including malformed hazard list recovery."""
-    pattern = r'\{(?:[^{}]|"[^"]*")*\}'
-    matches = re.findall(pattern, text, re.DOTALL)
-    for match in matches:
-        try:
-            cleaned = clean_raw_json_string(match)
-            # Detect and collect any ["..."] list fragments (typically malformed hazards)
-            hazard_items = re.findall(r'\["([^"]+)"\]', cleaned)
-            # Remove malformed hazard list fragments like: ["Hazards"], ["Chemicals"]
-            cleaned = re.sub(r'(\["[^"]+"\]\s*,?\s*)+', '', cleaned)
-            # If Hazards key is missing and we collected items, add it
-            if hazard_items and "Hazards" not in cleaned:
-                cleaned = cleaned.rstrip('} \n\t,')
-                cleaned += ', "Hazards": ' + json.dumps(hazard_items) + '}'
-            # Attempt to parse
-            parsed = json.loads(cleaned)
-            if isinstance(parsed, dict):
-                return parsed
-        except Exception as e:
-            print(f"⚠️ extract_json_object failed: {e}")
-            continue
-    return None
-def extract_fields(text):
-    def clean_text(t):
-        t = t.replace("‘", "'").replace("’", "'").replace("“", '"').replace("”", '"')
-        t = t.replace("''", '"').replace("``", '"').replace("†", "").replace("´", "")
-        t = re.sub(r"[^\x00-\x7F]+", "", t)
-        return t
-    cleaned = clean_text(text)
-    cause = "Unknown"
-    injury = "Unknown"
-    hazards = []
-    # Extract cause
-    match = re.search(r'"?Cause of Accident"?\s*:\s*"([^"]+)"', cleaned, re.IGNORECASE)
-    if match:
-        cause = match.group(1).strip()
-    # Use zero-shot classifier always for injury
-    try:
-        injury = classify_injury_zero_shot(cleaned)
-    except:
-        injury = "Unknown"
-    # Extract Hazards
-    match = re.search(r'"?Hazards"?\s*:\s*(\[[^\]]+\])', cleaned, re.IGNORECASE)
-    if match:
-        try:
-            hazards_raw = clean_text(match.group(1))
-            if not hazards_raw.strip().startswith("["):
-                raise ValueError("Not a list")
-            hazards = ast.literal_eval(hazards_raw)
-            hazards = [str(h).strip().strip('"').strip("'") for h in hazards]
-        except Exception as e:
-            print("⚠️ Hazard parsing failed:", e)
-            hazards = []
-    structured = {
-        "Hazards": hazards,
-        "Cause of Accident": cause,
-        "Degree of Injury": injury
-    }
-    return hazards, cause, injury, json.dumps(structured, indent=2)
-def generate_prediction(scenario_text, max_length=300, temperature=0.7):
-    global model, tokenizer
-    if model is None or tokenizer is None:
-        return "❌ Model not loaded.", "", "", "", ""
-    if not scenario_text.strip():
-        return "❌ Please enter a scenario.", "", "", "", ""
-    try:
-        prompt = format_input(scenario_text)
         inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
-        device = next(model.parameters()).device
-        inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
             output = model.generate(
                 **inputs,
                 max_length=inputs["input_ids"].shape[1] + max_length,
                 temperature=temperature,
-                do_sample=True,
                 top_p=0.9,
                 top_k=50,
                 repetition_penalty=1.1,
@@ -176,130 +76,107 @@ def generate_prediction(scenario_text, max_length=300, temperature=0.7):
                 eos_token_id=tokenizer.eos_token_id
             )
-        full_output = tokenizer.decode(output[0], skip_special_tokens=True)
-        index = full_output.rfind("Based on the situation")
-        generated = full_output[index:].strip() if index != -1 else full_output.strip()
-        json_obj = extract_json_object(generated)
-        if json_obj:
-            cause = json_obj.get("Cause of Accident", "Unknown")
-            injury = json_obj.get("Degree of Injury", "Unknown")
-            hazards = json_obj.get("Hazards", [])
-            structured_json = json.dumps(json_obj, indent=2)
-        else:
-            hazards, cause, injury, structured_json = extract_fields(generated)
-        hazards_display = ", ".join(hazards) if isinstance(hazards, list) else str(hazards)
-        return hazards_display, cause, injury, structured_json, f"=== RAW RESPONSE START ===\n{generated}\n=== RAW RESPONSE END ==="
     except Exception as e:
-        return f"❌ Error during prediction: {str(e)}", "", "", "", traceback.format_exc()
 def create_interface():
-    css = """
-    .gradio-container {
-        font-family: 'Arial', sans-serif;
-    }
-    .header {
-        text-align: center;
-        margin-bottom: 30px;
-    }
-    .warning-box {
-        background-color: #fff3cd;
-        border: 1px solid #ffeaa7;
-        border-radius: 5px;
-        padding: 15px;
-        margin: 10px 0;
-    }
-    .error-box {
-        background-color: #f8d7da;
-        border: 1px solid #f5c6cb;
-        border-radius: 5px;
-        padding: 15px;
-        margin: 10px 0;
-        color: #721c24;
-    }
-    """
-    with gr.Blocks(css=css, title="Workplace Safety Risk Predictor") as interface:
         gr.HTML("""
-        <div class="header">
-            <h1>🚧 Workplace Safety Risk Prediction Model</h1>
-            <p>Analyze workplace scenarios to identify potential hazards, causes, and injury severity</p>
-        </div>
         """)
         with gr.Row():
-            with gr.Column(scale=2):
-                scenario_input = gr.Textbox(
-                    lines=5,
-                    placeholder="e.g. During welding, flammable gas ignited, causing explosion...",
-                    label="Workplace Incident Description"
-                )
-                temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Creativity (Temperature)")
-                max_length = gr.Slider(100, 500, value=300, step=50, label="Max Response Length")
-                predict_btn = gr.Button("🔍 Analyze Scenario", variant="primary")
-                gr.HTML("""
-                <div class="warning-box">
-                    <strong>⚠️ Note:</strong> This tool is experimental. Consult safety experts for actual workplace assessments.
-                </div>
-                """)
-            with gr.Column(scale=2):
-                hazards_output = gr.Textbox(label="🚨 Identified Hazards")
-                cause_output = gr.Textbox(label="🔍 Cause of Accident")
                 degree_output = gr.Textbox(label="📈 Degree of Injury")
-                with gr.Accordion("📋 Structured Output", open=False):
-                    json_output = gr.Code(label="Extracted Info", language="json")
-                with gr.Accordion("🔍 Raw Model Output", open=False):
-                    raw_output = gr.Textbox(label="Raw Text", lines=5)
-        # Example Buttons
-        gr.HTML("<h3>💡 Example Scenarios</h3>")
-        with gr.Row():
-            example1 = gr.Button("Power Press Accident")
-            example2 = gr.Button("Fall from Ladder")
-            example3 = gr.Button("Chemical Exposure")
-            example4 = gr.Button("Lifting Injury")
         predict_btn.click(
-            fn=generate_prediction,
-            inputs=[scenario_input, max_length, temperature],
-            outputs=[hazards_output, cause_output, degree_output, json_output, raw_output]
         )
-        example1.click(
-            lambda: "An employee was operating a 400 ton mechanical power press. The press was actuated while the employee's right hand was in the point of operation. The employee's fingers were amputated.",
-            outputs=scenario_input
-        )
-        example2.click(
-            lambda: "An employee was using a ladder to access high shelves. The ladder was not properly secured and the employee fell from a height of 8 feet, resulting in head injuries.",
-            outputs=scenario_input
-        )
-        example3.click(
-            lambda: "An employee was working with chemical solvents without proper ventilation. The employee inhaled toxic fumes and experienced respiratory problems.",
-            outputs=scenario_input
-        )
-        example4.click(
-            lambda: "An employee was manually lifting heavy boxes weighing over 50 pounds without proper lifting technique or mechanical aids. The employee strained their back.",
-            outputs=scenario_input
-        )
-        gr.HTML("""
-        <div style="text-align: center; margin-top: 30px; color: #666;">
-            <p>Built with ❤️ using Hugging Face Transformers and Gradio</p>
-        </div>
-        """)
     return interface
-print("🚀 Launching App...")
-if load_model():
     app = create_interface()
     if __name__ == "__main__":
         app.launch(server_name="0.0.0.0", server_port=7860, share=True)
 else:
-    print("❌ Could not load model.")

 import torch
 import re
 import traceback
 import json
 import warnings
 warnings.filterwarnings("ignore")
 import os
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from peft import PeftModel
+from google import genai
 # Configuration
+MODEL_PATHS = [
+    "FrAnKu34t23/Construction_Risk_Prediction_Model_v3"
+]
 BASE_MODEL_ID = "distilgpt2"
+models = []
+tokenizers = []
 injury_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 def classify_injury_zero_shot(description):
     candidate_labels = [
+        "Low severity injury (minor discomfort or bruise) or unrelevant cases",
         "Medium severity injury (sprain, strain, moderate pain)",
         "High severity injury (fracture, major trauma, amputation, fatal)"
     ]
         candidate_labels[1]: "Medium",
         candidate_labels[2]: "High"
     }
     result = injury_classifier(description, candidate_labels)
+    return label_mapping[result['labels'][0]]
+def load_models():
+    global models, tokenizers
     try:
+        for path in MODEL_PATHS:
+            tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
+            tokenizer.pad_token = tokenizer.eos_token
+            base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_ID)
+            model = PeftModel.from_pretrained(base_model, path)
+            model.eval()
+            models.append(model)
+            tokenizers.append(tokenizer)
+        print("✅ All models loaded.")
         return True
     except Exception as e:
+        print(f"❌ Model loading failed: {e}")
         return False
 def format_input(scenario_text):
         scenario = ", " + scenario.lstrip(", ")
     return f"Based on the situation, predict potential hazards and injuries. {scenario}<|endoftext|>"
+def generate_all_model_outputs(prompt, max_length=300, temperature=0.7):
+    outputs = []
+    for model, tokenizer in zip(models, tokenizers):
         inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
+        inputs = {k: v.to(next(model.parameters()).device) for k, v in inputs.items()}
         with torch.no_grad():
             output = model.generate(
                 **inputs,
                 max_length=inputs["input_ids"].shape[1] + max_length,
                 temperature=temperature,
                 top_p=0.9,
                 top_k=50,
                 repetition_penalty=1.1,
                 eos_token_id=tokenizer.eos_token_id
             )
+        decoded = tokenizer.decode(output[0], skip_special_tokens=True)
+        outputs.append(f"=== RAW RESPONSE START ===\n{decoded}\n=== RAW RESPONSE END ===")
+    return outputs
+def extract_scenario_from_prompt(prompt):
+    try:
+        return re.sub(r"^.*predict potential hazards and injuries\.\s*", "", prompt)
+    except:
+        return prompt
+def call_gemini_pro(raw_outputs, zero_shot_injury):
+    client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
+    chat = client.chats.create(model="gemini-2.0-flash")
+    prompt = f"""
+You are a workplace safety analyst. Below are raw text outputs from three different AI models analyzing the same construction scenario.
+Your tasks:
+- Compare and merge the model outputs.
+- Summarize the most plausible cause of accident in natural language.
+- Infer the degree of injury by considering all outputs and a classifier suggestion.
+Classifier prediction for Degree of Injury: {zero_shot_injury}
+Model Outputs:
+{raw_outputs[0]}
+{raw_outputs[1]}
+{raw_outputs[2]}
+Respond in this format:
+Cause of Accident: <sentence>
+Degree of Injury: <Low / Medium / High>
+"""
+    try:
+        response = chat.send_message(prompt)
+        return response.text.strip()
+    except Exception as e:
+        print("❌ Gemini Pro API call failed:", e)
+        return "Cause of Accident: Unknown\nDegree of Injury: Unknown"
+def generate_prediction_ensemble(scenario_text, max_length=300, temperature=0.7):
+    if not scenario_text.strip():
+        return "❌ Please enter a scenario.", "", ""
+    try:
+        prompt = format_input(scenario_text)
+        raw_outputs = generate_all_model_outputs(prompt, max_length, temperature)
+        scenario_only = extract_scenario_from_prompt(prompt)
+        injury_guess = classify_injury_zero_shot(scenario_only)
+        gemini_response = call_gemini_pro(raw_outputs, injury_guess)
+        match_cause = re.search(r"Cause of Accident\s*:\s*(.+)", gemini_response)
+        match_injury = re.search(r"Degree of Injury\s*:\s*(Low|Medium|High)", gemini_response, re.IGNORECASE)
+        cause = match_cause.group(1).strip() if match_cause else "Unknown"
+        injury = match_injury.group(1).strip().capitalize() if match_injury else injury_guess
+        combined_raw = "\n\n".join(raw_outputs)
+        return cause, injury, combined_raw
     except Exception as e:
+        return "❌ Prediction failed.", "", traceback.format_exc()
 def create_interface():
+    with gr.Blocks(title="Workplace Safety Risk Predictor") as interface:
         gr.HTML("""
+        <h1>🚧 Workplace Safety Risk Prediction Model (Ensemble)</h1>
+        <p>Enter a construction scenario to analyze possible risks.</p>
         """)
         with gr.Row():
+            with gr.Column():
+                scenario_input = gr.Textbox(lines=5, label="Scenario Description")
+                temperature = gr.Slider(0.1, 1.0, 0.7, 0.1, label="Creativity (Temperature)")
+                max_len = gr.Slider(100, 500, 300, 50, label="Max Response Length")
+                predict_btn = gr.Button("🔍 Analyze")
+            with gr.Column():
+                cause_output = gr.Textbox(label="📝 Cause of Accident")
                 degree_output = gr.Textbox(label="📈 Degree of Injury")
+                with gr.Accordion("📄 Raw Model Outputs", open=False):
+                    raw_output = gr.Textbox(label="Raw Responses", lines=12)
         predict_btn.click(
+            fn=generate_prediction_ensemble,
+            inputs=[scenario_input, max_len, temperature],
+            outputs=[cause_output, degree_output, raw_output]
         )
+        gr.HTML("""<p style='text-align:center;'>Built with 🤖 Transformers + Gemini Flash + Gradio</p>""")
     return interface
+print("🚀 Starting app...")
+if load_models():
     app = create_interface()
     if __name__ == "__main__":
         app.launch(server_name="0.0.0.0", server_port=7860, share=True)
 else:
+    print("❌ Failed to load models.")