Spaces:

FrAnKu34t23
/

ConstructionRiskPredict

Sleeping

App Files Files Community

FrAnKu34t23 commited on Jul 27, 2025

Commit

823b0ef

verified ·

1 Parent(s): 65e487e

Update app.py

Browse files

Files changed (1) hide show

app.py +246 -71

app.py CHANGED Viewed

@@ -63,118 +63,228 @@ def format_input(scenario_text):
     return formatted_prompt
-return formatted_prompt
 def parse_json_response(response_text):
-    """Extract and parse JSON from model response"""
     try:
         # First, try to parse the entire response as JSON
-        if response_text.strip().startswith('{') and response_text.strip().endswith('}'):
-            return json.loads(response_text.strip())
-        # If that fails, look for JSON pattern in the text
-        json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
-        matches = re.findall(json_pattern, response_text, re.DOTALL)
-        for match in matches:
-            try:
-                return json.loads(match)
-            except:
-                continue
-        # If no valid JSON found, return structured error
-        return {
-            "Hazards": ["Unable to parse response"],
-            "Cause of Accident": "Model output parsing failed",
-            "Degree of Injury": "Unknown",
-            "raw_response": response_text
-        }
     except Exception as e:
         return {
-            "Hazards": [f"Parsing error: {str(e)}"],
-            "Cause of Accident": "JSON parsing failed",
             "Degree of Injury": "Unknown",
-            "raw_response": response_text
         }
 def generate_prediction(scenario_text, max_length=300, temperature=0.7):
     if model is None or tokenizer is None:
         return "❌ Model not loaded. Please wait for initialization.", "", "", "", ""
     try:
         # Format the input
         formatted_prompt = format_input(scenario_text)
-        full_prompt = f"{formatted_prompt}{tokenizer.eos_token}"
         # Tokenize
         inputs = tokenizer(
-            full_prompt,
             return_tensors="pt",
             truncation=True,
             max_length=512,
         device = next(model.parameters()).device
         inputs = {k: v.to(device) for k, v in inputs.items()}
-        # Generate response
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_length=len(inputs['input_ids'][0]) + max_length,
-                temperature=temperature,
                 do_sample=True,
-                top_p=0.9,
-                top_k=50,
                 pad_token_id=tokenizer.pad_token_id,
                 eos_token_id=tokenizer.eos_token_id,
                 num_return_sequences=1,
-                repetition_penalty=1.1,
                 early_stopping=True
             )
         # Decode response
-        full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
         # Extract generated part
-        input_text = tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=False)
         if full_response.startswith(input_text):
             generated_part = full_response[len(input_text):].strip()
         else:
             generated_part = full_response.strip()
-        # Clean up response
-        if generated_part.startswith(tokenizer.eos_token):
-            generated_part = generated_part[len(tokenizer.eos_token):].strip()
-        if generated_part.endswith(tokenizer.eos_token):
-            generated_part = generated_part[:-len(tokenizer.eos_token)].strip()
         # Parse the JSON response
         parsed_response = parse_json_response(generated_part)
-        # Extract individual components
         hazards = parsed_response.get("Hazards", [])
-        cause = parsed_response.get("Cause of Accident", "Not specified")
-        degree = parsed_response.get("Degree of Injury", "Not specified")
         # Format hazards for display
-        hazards_display = ", ".join(hazards) if isinstance(hazards, list) else str(hazards)
         # Create formatted output
-        formatted_output = json.dumps(parsed_response, indent=2, ensure_ascii=False)
         return hazards_display, cause, degree, formatted_output, generated_part
     except Exception as e:
         error_msg = f"❌ Error generating prediction: {str(e)}"
-        return error_msg, "", "", "", ""
 def create_interface():
     """Create the Gradio interface"""
-     # Custom CSS for better styling
     css = """
     .gradio-container {
         font-family: 'Arial', sans-serif;
@@ -209,103 +319,168 @@ def create_interface():
     """
     with gr.Blocks(css=css, title="Workplace Safety Risk Predictor") as interface:
                         temperature = gr.Slider(
                             minimum=0.1,
                             maximum=1.0,
-                            value=0.7,
                             step=0.1,
                             label="Creativity (Temperature)",
-                            info="Higher values = more creative responses"
                         )
                     with gr.Column():
                         max_length = gr.Slider(
                             minimum=100,
                             maximum=500,
-                            value=300,
                             step=50,
                             label="Max Response Length",
                             info="Maximum length of generated response"
                     with gr.Column():
                         hazards_output = gr.Textbox(
                             label="🚨 Identified Hazards",
-                            info="Potential hazards identified in the scenario"
                         )
                         cause_output = gr.Textbox(
                             label="🔍 Cause of Accident",
-                            info="Primary cause classification"
                         )
                         degree_output = gr.Textbox(
                             label="📈 Degree of Injury",
-                            info="Severity assessment"
                         )
                 with gr.Accordion("📋 Detailed JSON Output", open=False):
                 with gr.Accordion("🔍 Raw Model Output", open=False):
                     raw_output = gr.Textbox(
                         label="Raw Response",
-                        lines=3,
-                        info="Unprocessed model output"
                     )
         # Example scenarios
         gr.HTML("<h3>💡 Example Scenarios</h3>")
         with gr.Row():
-            example1 = gr.Button("Power Press Accident")
-            example2 = gr.Button("Fall from Ladder")
-            example3 = gr.Button("Chemical Exposure")
-            example4 = gr.Button("Lifting Injury")
         # Event handlers
         predict_btn.click(
             outputs=[hazards_output, cause_output, degree_output, json_output, raw_output]
         )
-        # Example scenarios
         example1.click(
-            lambda: "an employee was operating a 400 ton mechanical power press. The press was actuated while the employee's right hand was in the point of operation. The employee's fingers were amputated.",
             outputs=scenario_input
         )
         example2.click(
-            lambda: "an employee was using a ladder to access high shelves. The ladder was not properly secured and the employee fell from a height of 8 feet, resulting in head injuries.",
             outputs=scenario_input
         )
         example3.click(
-            lambda: "an employee was working with chemical solvents without proper ventilation. The employee inhaled toxic fumes and experienced respiratory problems.",
             outputs=scenario_input
         )
         example4.click(
-            lambda: "an employee was manually lifting heavy boxes weighing over 50 pounds without proper lifting technique or mechanical aids. The employee strained their back.",
             outputs=scenario_input
         )
         gr.HTML("""
         <div style="text-align: center; margin-top: 30px; color: #666;">
             <p>Built with ❤️ using Hugging Face Transformers and Gradio</p>
-            <p>Model: <a href="https://huggingface.co/FrAnKu34t23/Construction_Mistral_Risk_Prediction_Model_v3">Construction_Mistral_Risk_Prediction_Model_v3</a></p>
         </div>
         """)
         app.launch(
             server_name="0.0.0.0",
             server_port=7860,
-            share=True
         )
 else:
     print("❌ Failed to load model. App cannot start.")
     # Create a simple error interface
     with gr.Blocks() as error_app:
-        gr.HTML("<h1>❌ Model Loading Failed</h1><p>Unable to load the safety prediction model.</p>")
     if __name__ == "__main__":
         error_app.launch()

     return formatted_prompt
+def clean_json_string(text):
+    """Clean and fix common JSON formatting issues"""
+    # Remove any leading/trailing whitespace
+    text = text.strip()
+    # Fix common JSON issues
+    # Replace single quotes with double quotes (but be careful about apostrophes)
+    text = re.sub(r"'([^']*)':", r'"\1":', text)  # Fix keys
+    text = re.sub(r":\s*'([^']*)'", r': "\1"', text)  # Fix string values
+    # Fix missing quotes around keys
+    text = re.sub(r'([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', text)
+    # Fix trailing commas
+    text = re.sub(r',(\s*[}\]])', r'\1', text)
+    # Ensure arrays are properly formatted
+    text = re.sub(r'\[\s*([^[\]]+)\s*\]', lambda m: '[' + ', '.join([f'"{item.strip()}"' if not item.strip().startswith('"') else item.strip() for item in m.group(1).split(',')]) + ']', text)
+    return text
+def extract_structured_info(text):
+    """Extract structured information even if JSON parsing fails"""
+    result = {
+        "Hazards": [],
+        "Cause of Accident": "Not specified",
+        "Degree of Injury": "Not specified"
+    }
+    # Try to extract hazards
+    hazard_patterns = [
+        r"Hazards[\"']?\s*:\s*\[([^\]]+)\]",
+        r"Hazards[\"']?\s*:\s*([^,}]+)",
+        r"MATERIAL HANDLING|FALL PROTECTION|VALVE EXPLOSION|NOMA"
+    ]
+    for pattern in hazard_patterns:
+        matches = re.findall(pattern, text, re.IGNORECASE)
+        if matches:
+            if isinstance(matches[0], str):
+                # Clean and split hazards
+                hazards = [h.strip().strip('"\'') for h in matches[0].split(',')]
+                result["Hazards"] = [h for h in hazards if h and h != 'NOMA']
+                break
+    # Extract cause
+    cause_patterns = [
+        r"Cause of Accident[\"']?\s*:\s*[\"']([^\"']+)[\"']",
+        r"Other caused by ([^,}]+)",
+        r"Cause[\"']?\s*:\s*[\"']([^\"']+)[\"']"
+    ]
+    for pattern in cause_patterns:
+        match = re.search(pattern, text, re.IGNORECASE)
+        if match:
+            result["Cause of Accident"] = match.group(1).strip()
+            break
+    # Extract degree of injury
+    degree_patterns = [
+        r"Degree of Injury[\"']?\s*:\s*[\"']([^\"']+)[\"']",
+        r"High|Medium|Low|Severe|Minor|Fatal",
+        r"Injury[\"']?\s*:\s*[\"']([^\"']+)[\"']"
+    ]
+    for pattern in degree_patterns:
+        match = re.search(pattern, text, re.IGNORECASE)
+        if match:
+            result["Degree of Injury"] = match.group(1).strip() if hasattr(match, 'group') else match.group(0).strip()
+            break
+    return result
 def parse_json_response(response_text):
+    """Extract and parse JSON from model response with better error handling"""
     try:
+        # Clean the response text
+        cleaned_text = response_text.strip()
         # First, try to parse the entire response as JSON
+        if cleaned_text.startswith('{') and cleaned_text.endswith('}'):
+            try:
+                return json.loads(cleaned_text)
+            except json.JSONDecodeError:
+                # Try cleaning the JSON
+                cleaned_json = clean_json_string(cleaned_text)
+                try:
+                    return json.loads(cleaned_json)
+                except json.JSONDecodeError:
+                    pass
+        # Look for JSON-like patterns in the text
+        json_patterns = [
+            r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}',
+            r'\{.*?\}',
+        ]
+        for pattern in json_patterns:
+            matches = re.findall(pattern, response_text, re.DOTALL)
+            for match in matches:
+                try:
+                    cleaned_match = clean_json_string(match)
+                    return json.loads(cleaned_match)
+                except json.JSONDecodeError:
+                    continue
+        # If JSON parsing completely fails, extract structured info
+        structured_info = extract_structured_info(response_text)
+        structured_info["raw_response"] = response_text
+        structured_info["parsing_method"] = "regex_extraction"
+        return structured_info
     except Exception as e:
+        # Last resort: return basic structure with error info
         return {
+            "Hazards": ["Parsing failed - check raw output"],
+            "Cause of Accident": f"Error: {str(e)[:100]}...",
             "Degree of Injury": "Unknown",
+            "raw_response": response_text,
+            "error": str(e)
         }
 def generate_prediction(scenario_text, max_length=300, temperature=0.7):
+    """Generate workplace safety prediction"""
+    global model, tokenizer
     if model is None or tokenizer is None:
         return "❌ Model not loaded. Please wait for initialization.", "", "", "", ""
+    if not scenario_text.strip():
+        return "❌ Please enter a workplace scenario to analyze.", "", "", "", ""
     try:
         # Format the input
         formatted_prompt = format_input(scenario_text)
         # Tokenize
         inputs = tokenizer(
+            formatted_prompt,
             return_tensors="pt",
             truncation=True,
             max_length=512,
+            padding=False
+        )
+        # Move to same device as model
         device = next(model.parameters()).device
         inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Generate response with more conservative settings for better JSON
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_length=len(inputs['input_ids'][0]) + max_length,
+                temperature=max(0.3, temperature),  # Lower temperature for more consistent output
                 do_sample=True,
+                top_p=0.8,  # Slightly more conservative
+                top_k=40,   # Reduced for consistency
                 pad_token_id=tokenizer.pad_token_id,
                 eos_token_id=tokenizer.eos_token_id,
                 num_return_sequences=1,
+                repetition_penalty=1.2,  # Slightly higher to avoid repetition
                 early_stopping=True
             )
         # Decode response
+        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         # Extract generated part
+        input_text = tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)
         if full_response.startswith(input_text):
             generated_part = full_response[len(input_text):].strip()
         else:
             generated_part = full_response.strip()
+        # Clean up common artifacts
+        generated_part = re.sub(r'^[,\s]*', '', generated_part)  # Remove leading commas/spaces
+        generated_part = re.sub(r'<[^>]*>', '', generated_part)  # Remove any HTML-like tags
         # Parse the JSON response
         parsed_response = parse_json_response(generated_part)
+        # Extract individual components with better defaults
         hazards = parsed_response.get("Hazards", [])
+        if not hazards or (isinstance(hazards, list) and len(hazards) == 0):
+            hazards = ["No specific hazards identified"]
+        cause = parsed_response.get("Cause of Accident", "Analysis incomplete")
+        degree = parsed_response.get("Degree of Injury", "Assessment needed")
         # Format hazards for display
+        if isinstance(hazards, list):
+            hazards_display = ", ".join(str(h) for h in hazards if h)
+        else:
+            hazards_display = str(hazards)
         # Create formatted output
+        display_response = {
+            "Hazards": hazards,
+            "Cause of Accident": cause,
+            "Degree of Injury": degree
+        }
+        # Add metadata if available
+        if "parsing_method" in parsed_response:
+            display_response["Parsing Method"] = parsed_response["parsing_method"]
+        formatted_output = json.dumps(display_response, indent=2, ensure_ascii=False)
         return hazards_display, cause, degree, formatted_output, generated_part
     except Exception as e:
         error_msg = f"❌ Error generating prediction: {str(e)}"
+        print(f"Generation error: {e}")  # For debugging
+        return error_msg, "", "", "", str(e)
 def create_interface():
     """Create the Gradio interface"""
+    # Custom CSS for better styling
     css = """
     .gradio-container {
         font-family: 'Arial', sans-serif;
     """
     with gr.Blocks(css=css, title="Workplace Safety Risk Predictor") as interface:
+        gr.HTML("""
+        <div class="header">
+            <h1>🚧 Workplace Safety Risk Prediction Model</h1>
+            <p>Analyze workplace scenarios to identify potential hazards, causes, and injury severity</p>
+        </div>
+        """)
+        with gr.Row():
+            with gr.Column(scale=2):
+                gr.HTML("<h3>📝 Enter Workplace Scenario</h3>")
+                scenario_input = gr.Textbox(
+                    lines=5,
+                    placeholder="Example: an employee was operating a 400 ton mechanical power press. The press was actuated while the employee's right hand was in the point of operation...",
+                    label="Workplace Incident Description",
+                    info="Describe the workplace scenario you want to analyze"
+                )
+                with gr.Row():
+                    with gr.Column():
                         temperature = gr.Slider(
                             minimum=0.1,
                             maximum=1.0,
+                            value=0.5,  # Lower default for more consistent output
                             step=0.1,
                             label="Creativity (Temperature)",
+                            info="Lower values = more consistent responses"
                         )
                     with gr.Column():
                         max_length = gr.Slider(
                             minimum=100,
                             maximum=500,
+                            value=250,  # Slightly lower default
                             step=50,
                             label="Max Response Length",
                             info="Maximum length of generated response"
+                        )
+                predict_btn = gr.Button("🔍 Analyze Scenario", variant="primary", size="lg")
+                gr.HTML("""
+                <div class="warning-box">
+                    <strong>⚠️ Note:</strong> This is an AI model for educational purposes.
+                    Always consult safety professionals for real workplace safety assessments.
+                </div>
+                """)
+            with gr.Column(scale=2):
+                gr.HTML("<h3>📊 Analysis Results</h3>")
+                with gr.Row():
                     with gr.Column():
                         hazards_output = gr.Textbox(
                             label="🚨 Identified Hazards",
+                            info="Potential hazards identified in the scenario",
+                            interactive=False
                         )
                         cause_output = gr.Textbox(
                             label="🔍 Cause of Accident",
+                            info="Primary cause classification",
+                            interactive=False
                         )
                         degree_output = gr.Textbox(
                             label="📈 Degree of Injury",
+                            info="Severity assessment",
+                            interactive=False
                         )
                 with gr.Accordion("📋 Detailed JSON Output", open=False):
+                    json_output = gr.Code(
+                        label="Structured Response",
+                        language="json"
+                    )
                 with gr.Accordion("🔍 Raw Model Output", open=False):
                     raw_output = gr.Textbox(
                         label="Raw Response",
+                        lines=5,
+                        info="Unprocessed model output for debugging"
                     )
         # Example scenarios
         gr.HTML("<h3>💡 Example Scenarios</h3>")
         with gr.Row():
+            example1 = gr.Button("Power Press Accident", size="sm")
+            example2 = gr.Button("Fall from Ladder", size="sm")
+            example3 = gr.Button("Chemical Exposure", size="sm")
+            example4 = gr.Button("Lifting Injury", size="sm")
         # Event handlers
         predict_btn.click(
+            fn=generate_prediction,
+            inputs=[scenario_input, max_length, temperature],
             outputs=[hazards_output, cause_output, degree_output, json_output, raw_output]
         )
+        # Example scenarios with better formatting
         example1.click(
+            lambda: "An employee was operating a 400 ton mechanical power press. The press was actuated while the employee's right hand was in the point of operation. The employee's fingers were amputated.",
             outputs=scenario_input
         )
         example2.click(
+            lambda: "An employee was using a ladder to access high shelves. The ladder was not properly secured and the employee fell from a height of 8 feet, resulting in head injuries.",
             outputs=scenario_input
         )
         example3.click(
+            lambda: "An employee was working with chemical solvents without proper ventilation. The employee inhaled toxic fumes and experienced respiratory problems.",
             outputs=scenario_input
         )
         example4.click(
+            lambda: "An employee was manually lifting heavy boxes weighing over 50 pounds without proper lifting technique or mechanical aids. The employee strained their back.",
             outputs=scenario_input
         )
         gr.HTML("""
         <div style="text-align: center; margin-top: 30px; color: #666;">
             <p>Built with ❤️ using Hugging Face Transformers and Gradio</p>
+            <p>Model: <a href="https://huggingface.co/FrAnKu34t23/Construction_Mistral_Risk_Prediction_Model_v3" target="_blank">Construction_Mistral_Risk_Prediction_Model_v3</a></p>
         </div>
         """)
+    return interface
+# Initialize the model when the app starts
+print("🚀 Initializing Workplace Safety Risk Prediction App...")
+model_loaded = load_model()
+if model_loaded:
+    print("✅ App ready!")
+    # Create and launch the interface
+    app = create_interface()
+    if __name__ == "__main__":
         app.launch(
             server_name="0.0.0.0",
             server_port=7860,
+            share=True,
+            show_error=True  # Better error display
         )
 else:
     print("❌ Failed to load model. App cannot start.")
     # Create a simple error interface
     with gr.Blocks() as error_app:
+        gr.HTML("""
+        <div class="error-box">
+            <h1>❌ Model Loading Failed</h1>
+            <p>Unable to load the safety prediction model. Please check:</p>
+            <ul>
+                <li>Internet connection for model download</li>
+                <li>Available system memory</li>
+                <li>Model repository accessibility</li>
+            </ul>
+        </div>
+        """)
     if __name__ == "__main__":
         error_app.launch()