Spaces:

FrAnKu34t23
/

ConstructionRiskPredict

Sleeping

FrAnKu34t23 commited on Jul 30, 2025

Commit

08f8055

verified ·

1 Parent(s): e5c6abe

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -79,28 +79,36 @@ def generate_single_model_output(model, tokenizer, prompt, max_length=300, tempe
     return tokenizer.decode(output[0], skip_special_tokens=True).strip()
 # === ANALYSIS WITH FLAN-T5 ===
 def analyze_with_cpu_model(raw_outputs, zero_shot_injury):
-    summary = ""
     for i, text in enumerate(raw_outputs):
-        summary += f"Model {i+1} raw output:\n{text.strip()}\n\n"
     prompt = (
-        f"The following are raw outputs from multiple workplace hazard prediction models:\n\n"
-        f"{summary}\n"
-        f"A separate zero-shot classifier predicted the degree of injury as: {zero_shot_injury}.\n\n"
-        f"Please analyze all outputs (especially the structured JSON) and summarize the most plausible:\n"
-        f"- Cause of the accident (in natural language, not just copied)\n"
-        f"- Degree of Injury (Low, Medium, High)\n\n"
-        f"Return only in the format:\n"
-        f"Cause of Accident: ...\n"
-        f"Degree of Injury: ..."
     )
     inputs = flan_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to("cpu")
     with torch.no_grad():
         output = flan_model.generate(
             **inputs,
-            max_length=256,
             temperature=0.5,
             top_p=0.9,
             do_sample=True

     return tokenizer.decode(output[0], skip_special_tokens=True).strip()
 # === ANALYSIS WITH FLAN-T5 ===
+def extract_json_only(text):
+    """Extract just the first JSON object from model text output."""
+    pattern = r'\{(?:[^{}]|"[^"]*")*\}'
+    matches = re.findall(pattern, text, re.DOTALL)
+    return matches[0] if matches else ""
 def analyze_with_cpu_model(raw_outputs, zero_shot_injury):
+    # Only extract JSON from each model output
+    json_blobs = []
     for i, text in enumerate(raw_outputs):
+        json_part = extract_json_only(text)
+        if json_part:
+            json_blobs.append(f"Model {i+1} JSON:\n{json_part}")
+    summary = "\n\n".join(json_blobs)
     prompt = (
+        f"The following are JSON outputs from multiple hazard prediction models:\n\n"
+        f"{summary}\n\n"
+        f"A separate classifier predicted this injury severity: {zero_shot_injury}.\n\n"
+        f"Please analyze all JSON outputs and return:\n"
+        f"Cause of Accident: <natural language summary of the most likely cause>\n"
+        f"Degree of Injury: <Low | Medium | High>"
     )
     inputs = flan_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to("cpu")
     with torch.no_grad():
         output = flan_model.generate(
             **inputs,
+            max_length=128,
             temperature=0.5,
             top_p=0.9,
             do_sample=True