Spaces:

Shivangguptasih
/

simplifier

Runtime error

App Files Files Community

Shivangguptasih commited on Oct 2, 2025

Commit

624eb07

verified ·

1 Parent(s): f2ded98

Create app.py

Browse files

Files changed (1) hide show

app.py +170 -0

app.py ADDED Viewed

	@@ -0,0 +1,170 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import Optional
+import os
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import re
+app = FastAPI(title="Caption Simplifier API")
+class RobustSimplifier:
+    def __init__(self):
+        # Load model from local files
+        self.tokenizer = AutoTokenizer.from_pretrained("./model")
+        self.model = AutoModelForSeq2SeqLM.from_pretrained("./model")
+        self.simplification_rules = {
+            "implementation of": "",
+            "utilization of": "",
+            "revolutionized": "changed",
+            "enhanced": "improved",
+            "launched": "started",
+            "significant": "big",
+            "remarkable": "great",
+            "immediate": "urgent",
+            "breakthrough": "finding",
+            "methodologies": "methods",
+            "artificial intelligence": "AI",
+            "data processing": "data work",
+            "medical attention": "medical help",
+            "cancer treatment": "cancer care",
+            "quantum physics": "quantum science",
+            "has revolutionized": "changed",
+            "has enhanced": "improved",
+            "has launched": "started",
+            "have discovered": "found",
+            "have made": "created",
+            "needs immediate": "needs urgent",
+            "the government": "government",
+            "the researchers": "researchers",
+            "the scientists": "scientists",
+            "the doctors": "doctors",
+            "the patient": "patient"
+        }
+        self.words_to_remove = {
+            "the", "a", "an", "has", "have", "been", "is", "are", "was", "were",
+            "of", "in", "on", "at", "by", "for", "with", "to", "from"
+        }
+    def simplify(self, text):
+        rule_result = self.rule_based_simplify(text)
+        if self.is_good_simplification(text, rule_result):
+            return rule_result
+        model_result = self.get_model_simplification(text)
+        if self.is_good_simplification(text, model_result) and not self.has_hallucination(text, model_result):
+            return model_result
+        return rule_result
+    def rule_based_simplify(self, text):
+        result = text.lower()
+        for old, new in self.simplification_rules.items():
+            result = result.replace(old, new)
+        words = result.split()
+        important_words = [word for word in words if word not in self.words_to_remove]
+        if len(important_words) > 7:
+            important_words = important_words[:7]
+        result = " ".join(important_words)
+        result = re.sub(r'\s+', ' ', result).strip()
+        if result:
+            result = result[0].upper() + result[1:]
+        return result
+    def get_model_simplification(self, text):
+        try:
+            inputs = self.tokenizer(f"simplify: {text}", return_tensors="pt")
+            outputs = self.model.generate(
+                inputs.input_ids,
+                max_length=24,
+                num_beams=1,
+                do_sample=False
+            )
+            result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            result = result.strip()
+            if result.startswith("simplify: "):
+                result = result[10:]
+            return result.strip()
+        except:
+            return ""
+    def is_good_simplification(self, original, simplified):
+        if len(simplified) >= len(original):
+            return False
+        if len(simplified) < 3:
+            return False
+        if not re.match(r'^[a-zA-Z\s]+$', simplified):
+            return False
+        if len(simplified) > len(original) * 0.7:
+            return False
+        return True
+    def has_hallucination(self, original, simplified):
+        original_words = set(original.lower().split())
+        simplified_words = set(simplified.lower().split())
+        new_words = simplified_words - original_words
+        if len(new_words) > 2:
+            return True
+        hallucination_patterns = [
+            "disabilities", "cancer cells are more", "more susceptible to",
+            "simplify the rules", "than cancer cells", "government rules"
+        ]
+        simplified_lower = simplified.lower()
+        for pattern in hallucination_patterns:
+            if pattern in simplified_lower:
+                return True
+        return False
+# Initialize model
+simplifier = RobustSimplifier()
+class SimplifyRequest(BaseModel):
+    text: str
+    language: Optional[str] = "en"
+class SimplifyResponse(BaseModel):
+    original: str
+    simplified: str
+    language: str
+@app.get("/")
+def read_root():
+    return {"status": "healthy", "message": "Caption Simplifier API is running"}
+@app.post("/simplify", response_model=SimplifyResponse)
+def simplify_text(request: SimplifyRequest):
+    simplified = simplifier.simplify(request.text)
+    return SimplifyResponse(
+        original=request.text,
+        simplified=simplified,
+        language=request.language
+    )
+@app.get("/test")
+def test_api():
+    test_text = "The government has launched a new scheme for improving education quality."
+    simplified = simplifier.simplify(test_text)
+    return {
+        "test_input": test_text,
+        "test_output": simplified,
+        "status": "success"
+    }