Spaces:

FrAnKu34t23
/

ConstructionRiskPredict

Sleeping

App Files Files Community

FrAnKu34t23 commited on Jul 30, 2025

Commit

744970d

verified ·

1 Parent(s): 43a7a51

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -98

app.py CHANGED Viewed

@@ -9,19 +9,28 @@ import os
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from peft import PeftModel
-# Configuration
 MODEL_PATHS = [
     "FrAnKu34t23/Construction_Risk_Prediction_Model_v3"
 ]
-BASE_MODEL_ID = "distilgpt2"
 models = []
 tokenizers = []
-# Initialize pipelines for different tasks
 injury_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-text_generator = pipeline("text-generation", model="microsoft/DialoGPT-medium", max_length=512)
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 def classify_injury_zero_shot(description):
     candidate_labels = [
@@ -54,41 +63,6 @@ def load_models():
         print(f"❌ Model loading failed: {e}")
         return False
-def parse_input(input_text):
-    """Parse input - can be plain text or JSON"""
-    try:
-        # Try to parse as JSON first
-        data = json.loads(input_text)
-        if isinstance(data, dict):
-            # Extract relevant fields from JSON
-            scenario = ""
-            if "scenario" in data:
-                scenario = data["scenario"]
-            elif "description" in data:
-                scenario = data["description"]
-            elif "text" in data:
-                scenario = data["text"]
-            else:
-                # If no obvious field, concatenate all string values
-                scenario = " ".join([str(v) for v in data.values() if isinstance(v, str)])
-            # Add additional context if available
-            context_fields = ["location", "equipment", "workers", "conditions", "environment"]
-            context = []
-            for field in context_fields:
-                if field in data and data[field]:
-                    context.append(f"{field}: {data[field]}")
-            if context:
-                scenario += " Additional context: " + ", ".join(context)
-            return scenario.strip(), data
-        else:
-            return str(data), {"raw_input": str(data)}
-    except json.JSONDecodeError:
-        # If not JSON, treat as plain text
-        return input_text.strip(), {"scenario": input_text.strip()}
 def format_input(scenario_text):
     scenario = scenario_text.strip()
     if not scenario.startswith(", "):
@@ -123,74 +97,134 @@ def extract_scenario_from_prompt(prompt):
     except:
         return prompt
-def analyze_with_hf_models(raw_outputs, zero_shot_injury):
-    """Replace Gemini with Hugging Face models for analysis"""
     try:
-        # Combine all raw outputs
-        combined_text = "\n".join([output.replace("=== RAW RESPONSE START ===", "").replace("=== RAW RESPONSE END ===", "") for output in raw_outputs])
-        # Use summarization to get key points
-        if len(combined_text) > 100:
-            try:
-                summary_result = summarizer(combined_text[:1024], max_length=150, min_length=50, do_sample=False)
-                summarized_text = summary_result[0]['summary_text']
-            except:
-                summarized_text = combined_text[:500]  # Fallback to truncation
-        else:
-            summarized_text = combined_text
-        # Generate cause analysis using text generation
-        cause_prompt = f"Analyze this workplace safety incident and identify the main cause: {summarized_text}. The primary cause of this accident was"
-        try:
-            cause_result = text_generator(cause_prompt, max_length=len(cause_prompt.split()) + 30, temperature=0.7, do_sample=True)
-            cause_text = cause_result[0]['generated_text']
-            # Extract the generated part after the prompt
-            cause = cause_text.replace(cause_prompt, "").strip()
-            if not cause:
-                cause = "Unable to determine specific cause from the analysis"
-        except:
-            cause = "Analysis indicates multiple contributing factors to the workplace incident"
-        # Use the zero-shot classification result for injury degree
-        injury_degree = zero_shot_injury
-        return f"Cause of Accident: {cause}\nDegree of Injury: {injury_degree}"
     except Exception as e:
-        print("❌ HF model analysis failed:", e)
-        return f"Cause of Accident: Analysis failed due to technical error\nDegree of Injury: {zero_shot_injury}"
-def generate_prediction_ensemble(input_text, max_length=300, temperature=0.7):
-    if not input_text.strip():
         return "❌ Please enter a scenario.", "", "", ""
     try:
-        # Parse input (JSON or plain text)
-        scenario_text, parsed_data = parse_input(input_text)
-        if not scenario_text:
-            return "❌ No valid scenario found in input.", "", "", json.dumps(parsed_data, indent=2)
         prompt = format_input(scenario_text)
         raw_outputs = generate_all_model_outputs(prompt, max_length, temperature)
         scenario_only = extract_scenario_from_prompt(prompt)
         injury_guess = classify_injury_zero_shot(scenario_only)
-        # Use HF models instead of Gemini
-        hf_response = analyze_with_hf_models(raw_outputs, injury_guess)
-        match_cause = re.search(r"Cause of Accident\s*:\s*(.+)", hf_response)
-        match_injury = re.search(r"Degree of Injury\s*:\s*(Low|Medium|High)", hf_response, re.IGNORECASE)
         cause = match_cause.group(1).strip() if match_cause else "Unable to determine cause"
         injury = match_injury.group(1).strip().capitalize() if match_injury else injury_guess
         combined_raw = "\n\n".join(raw_outputs)
-        parsed_json = json.dumps(parsed_data, indent=2)
-        return cause, injury, combined_raw, parsed_json
     except Exception as e:
         return "❌ Prediction failed.", "", traceback.format_exc(), ""
@@ -198,25 +232,27 @@ def generate_prediction_ensemble(input_text, max_length=300, temperature=0.7):
 def create_interface():
     with gr.Blocks(title="Workplace Safety Risk Predictor") as interface:
         gr.HTML("""
-        <h1>🚧 Workplace Safety Risk Prediction Model (Ensemble)</h1>
-        <p>Enter a construction scenario to analyze possible risks. Supports both plain text and structured JSON input.</p>
-        <p><strong>Plain Text Examples:</strong></p>
         <ul>
             <li>An employee was working with chemical solvents without proper ventilation. The employee inhaled toxic fumes and experienced respiratory problems.</li>
             <li>A worker fell from scaffolding due to lack of fall protection measures in place.</li>
         </ul>
-        <p><strong>JSON Example:</strong></p>
-        <pre>{"scenario": "Worker fell from height", "location": "Construction site", "equipment": "Scaffolding", "conditions": "No safety harness"}</pre>
         """)
         with gr.Row():
             with gr.Column():
-                scenario_input = gr.Textbox(lines=8, label="Scenario Description (Plain Text or JSON)")
                 gr.Markdown("**Quick Examples:**")
                 with gr.Row():
                     ex1 = gr.Button("Solvent Exposure")
                     ex2 = gr.Button("Fall from Scaffolding")
-                    ex3 = gr.Button("JSON Example")
                     ex4 = gr.Button("Welding Fire Hazard")
                 temperature = gr.Slider(0.1, 1.0, 0.7, 0.1, label="Creativity (Temperature)")
                 max_len = gr.Slider(100, 500, 300, 50, label="Max Response Length")
@@ -225,24 +261,23 @@ def create_interface():
             with gr.Column():
                 cause_output = gr.Textbox(label="📝 Cause of Accident")
                 degree_output = gr.Textbox(label="📈 Degree of Injury")
-                with gr.Accordion("📊 Parsed Input", open=False):
-                    parsed_output = gr.Textbox(label="Parsed JSON Structure", lines=6)
                 with gr.Accordion("📄 Raw Model Outputs", open=False):
                     raw_output = gr.Textbox(label="Raw Responses", lines=12)
         predict_btn.click(
             fn=generate_prediction_ensemble,
             inputs=[scenario_input, max_len, temperature],
-            outputs=[cause_output, degree_output, raw_output, parsed_output]
         )
-        # Example functions
         ex1.click(fn=lambda: "An employee was working with chemical solvents without proper ventilation. The employee inhaled toxic fumes and experienced respiratory problems.", outputs=scenario_input)
         ex2.click(fn=lambda: "A worker fell from scaffolding due to lack of fall protection measures in place.", outputs=scenario_input)
-        ex3.click(fn=lambda: '{"scenario": "Equipment malfunction during operation", "location": "Factory floor", "equipment": "Heavy machinery", "workers": "2 operators", "conditions": "Poor maintenance, inadequate training"}', outputs=scenario_input)
         ex4.click(fn=lambda: "During welding, flammable vapors ignited due to poor fire safety practices.", outputs=scenario_input)
-        gr.HTML("<p style='text-align:center;'>Built with Transformers + Hugging Face Models + Gradio</p>")
     return interface

 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from peft import PeftModel
+# Configuration - Using better base models
 MODEL_PATHS = [
     "FrAnKu34t23/Construction_Risk_Prediction_Model_v3"
 ]
+# Better base model options - choose one based on your needs
+BASE_MODEL_ID = "microsoft/DialoGPT-medium"  # Better conversational model
+# Alternative options:
+# BASE_MODEL_ID = "gpt2-medium"  # Larger GPT-2
+# BASE_MODEL_ID = "microsoft/DialoGPT-large"  # Even better but slower
 models = []
 tokenizers = []
+# Initialize better models for analysis
 injury_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+# Use a more capable model for text analysis and reasoning
+analysis_model = pipeline(
+    "text-generation",
+    model="microsoft/DialoGPT-large",  # Better reasoning capabilities
+    device=0 if torch.cuda.is_available() else -1
+)
 def classify_injury_zero_shot(description):
     candidate_labels = [
         print(f"❌ Model loading failed: {e}")
         return False
 def format_input(scenario_text):
     scenario = scenario_text.strip()
     if not scenario.startswith(", "):
     except:
         return prompt
+def parse_json_from_raw_output(raw_output):
+    """Extract JSON from raw model output"""
     try:
+        # Look for JSON pattern in the raw output
+        json_match = re.search(r'\{.*?\}', raw_output, re.DOTALL)
+        if json_match:
+            json_str = json_match.group(0)
+            return json.loads(json_str)
+        return None
+    except:
+        return None
+def extract_structured_data_from_outputs(raw_outputs):
+    """Extract and combine structured JSON data from all model outputs"""
+    all_json_data = []
+    for output in raw_outputs:
+        json_data = parse_json_from_raw_output(output)
+        if json_data:
+            all_json_data.append(json_data)
+    return all_json_data
+def analyze_with_advanced_hf_model(raw_outputs, zero_shot_injury, structured_data):
+    """Replace Gemini Pro functionality with advanced HF model analysis"""
+    # Prepare the analysis prompt similar to original Gemini prompt
+    structured_info = ""
+    if structured_data:
+        structured_info = "\n\nStructured data extracted from models:\n"
+        for i, data in enumerate(structured_data, 1):
+            structured_info += f"Model {i}: {json.dumps(data, indent=2)}\n"
+    prompt = f"""You are a workplace safety analyst. Below are raw text outputs from construction safety prediction models.
+Your tasks:
+- Compare and merge the model outputs
+- Summarize the most plausible cause of accident in natural language
+- Infer the degree of injury by considering all outputs and classifier suggestion
+Classifier prediction for Degree of Injury: {zero_shot_injury}
+Model Outputs:
+{raw_outputs[0]}
+{raw_outputs[1] if len(raw_outputs) > 1 else ""}
+{raw_outputs[2] if len(raw_outputs) > 2 else ""}
+{structured_info}
+Based on this analysis, provide a concise response in this format:
+Cause of Accident: [single clear sentence]
+Degree of Injury: [Low/Medium/High]
+Analysis:"""
+    try:
+        # Use the analysis model to generate response
+        response = analysis_model(
+            prompt,
+            max_length=len(prompt.split()) + 100,
+            temperature=0.3,  # Lower temperature for more consistent analysis
+            do_sample=True,
+            pad_token_id=analysis_model.tokenizer.eos_token_id
+        )
+        generated_text = response[0]['generated_text']
+        # Extract only the generated part after the prompt
+        analysis_result = generated_text.replace(prompt, "").strip()
+        # If the analysis doesn't contain the required format, create it
+        if "Cause of Accident:" not in analysis_result:
+            # Fallback analysis based on structured data
+            cause = "Multiple safety protocol violations identified"
+            if structured_data:
+                causes = []
+                for data in structured_data:
+                    if isinstance(data, dict) and "Cause of Accident" in data:
+                        causes.append(data["Cause of Accident"])
+                if causes:
+                    cause = causes[0]  # Take the first cause found
+            analysis_result = f"Cause of Accident: {cause}\nDegree of Injury: {zero_shot_injury}"
+        return analysis_result
     except Exception as e:
+        print("❌ Advanced HF model analysis failed:", e)
+        # Fallback using structured data if available
+        if structured_data and len(structured_data) > 0:
+            first_data = structured_data[0]
+            cause = first_data.get("Cause of Accident", "Safety protocol violation")
+            injury = first_data.get("Degree of Injury", zero_shot_injury)
+            return f"Cause of Accident: {cause}\nDegree of Injury: {injury}"
+        return f"Cause of Accident: Unable to analyze due to technical error\nDegree of Injury: {zero_shot_injury}"
+def generate_prediction_ensemble(scenario_text, max_length=300, temperature=0.7):
+    if not scenario_text.strip():
         return "❌ Please enter a scenario.", "", "", ""
     try:
         prompt = format_input(scenario_text)
         raw_outputs = generate_all_model_outputs(prompt, max_length, temperature)
         scenario_only = extract_scenario_from_prompt(prompt)
         injury_guess = classify_injury_zero_shot(scenario_only)
+        # Extract structured JSON data from raw outputs
+        structured_data = extract_structured_data_from_outputs(raw_outputs)
+        # Use advanced HF model analysis (replacing Gemini)
+        hf_analysis = analyze_with_advanced_hf_model(raw_outputs, injury_guess, structured_data)
+        # Parse the analysis results
+        match_cause = re.search(r"Cause of Accident\s*:\s*(.+)", hf_analysis)
+        match_injury = re.search(r"Degree of Injury\s*:\s*(Low|Medium|High)", hf_analysis, re.IGNORECASE)
         cause = match_cause.group(1).strip() if match_cause else "Unable to determine cause"
         injury = match_injury.group(1).strip().capitalize() if match_injury else injury_guess
         combined_raw = "\n\n".join(raw_outputs)
+        # Format structured data for display
+        structured_display = json.dumps(structured_data, indent=2) if structured_data else "No structured data found"
+        return cause, injury, combined_raw, structured_display
     except Exception as e:
         return "❌ Prediction failed.", "", traceback.format_exc(), ""
 def create_interface():
     with gr.Blocks(title="Workplace Safety Risk Predictor") as interface:
         gr.HTML("""
+        <h1>🚧 Workplace Safety Risk Prediction Model (Enhanced Ensemble)</h1>
+        <p>Enter a construction scenario to analyze possible risks. Uses advanced language models for better analysis.</p>
+        <p><strong>Expected JSON Output Format:</strong></p>
+        <pre>{"Cause of Accident": "...", "Degree of Injury": "High/Medium/Low", "Hazards": ["...", "..."]}</pre>
+        <p><strong>Examples:</strong></p>
         <ul>
             <li>An employee was working with chemical solvents without proper ventilation. The employee inhaled toxic fumes and experienced respiratory problems.</li>
             <li>A worker fell from scaffolding due to lack of fall protection measures in place.</li>
+            <li>While operating a crane, the load became unstable and struck a nearby worker.</li>
+            <li>During welding, flammable vapors ignited due to poor fire safety practices.</li>
         </ul>
         """)
         with gr.Row():
             with gr.Column():
+                scenario_input = gr.Textbox(lines=5, label="Scenario Description")
                 gr.Markdown("**Quick Examples:**")
                 with gr.Row():
                     ex1 = gr.Button("Solvent Exposure")
                     ex2 = gr.Button("Fall from Scaffolding")
+                    ex3 = gr.Button("Crane Load Accident")
                     ex4 = gr.Button("Welding Fire Hazard")
                 temperature = gr.Slider(0.1, 1.0, 0.7, 0.1, label="Creativity (Temperature)")
                 max_len = gr.Slider(100, 500, 300, 50, label="Max Response Length")
             with gr.Column():
                 cause_output = gr.Textbox(label="📝 Cause of Accident")
                 degree_output = gr.Textbox(label="📈 Degree of Injury")
+                with gr.Accordion("📊 Extracted Structured Data", open=False):
+                    structured_output = gr.Textbox(label="JSON Data from Models", lines=8)
                 with gr.Accordion("📄 Raw Model Outputs", open=False):
                     raw_output = gr.Textbox(label="Raw Responses", lines=12)
         predict_btn.click(
             fn=generate_prediction_ensemble,
             inputs=[scenario_input, max_len, temperature],
+            outputs=[cause_output, degree_output, raw_output, structured_output]
         )
         ex1.click(fn=lambda: "An employee was working with chemical solvents without proper ventilation. The employee inhaled toxic fumes and experienced respiratory problems.", outputs=scenario_input)
         ex2.click(fn=lambda: "A worker fell from scaffolding due to lack of fall protection measures in place.", outputs=scenario_input)
+        ex3.click(fn=lambda: "While operating a crane, the load became unstable and struck a nearby worker.", outputs=scenario_input)
         ex4.click(fn=lambda: "During welding, flammable vapors ignited due to poor fire safety practices.", outputs=scenario_input)
+        gr.HTML("<p style='text-align:center;'>Built with Advanced Transformers + Enhanced Analysis + Gradio</p>")
     return interface