Spaces:

Syamchand
/

red_ml-models

Sleeping

Syamchand commited on 13 days ago

Commit

8fcdf14

verified ·

1 Parent(s): a7a5f4c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -352,28 +352,28 @@ def semantic_chunking(req: ChunkRequest):
 @app.post("/predict/explain")
 def explain_text(req: ExplanationRequest):
-    """
-    Summarize or explain the input text.
-    mode = "summarize"  → short summary
-    mode = "explain"    → detailed plain‑English explanation
-    """
     tokenizer = models["explain_tokenizer"]
     model = models["explain_model"]
-    if req.mode == "summarize":
-        instruction = f"Summarize the following contract clause in 1-2 sentences:\n\n{req.text}"
-    else:
-        instruction = f"Explain the following contract clause in plain English, in detail:\n\n{req.text}"
-    inputs = tokenizer(instruction, return_tensors="pt", truncation=True, max_length=512)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             max_new_tokens=150,
-            do_sample=False,
-            num_beams=4,
             early_stopping=True
         )
     result = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -382,7 +382,6 @@ def explain_text(req: ExplanationRequest):
 @app.post("/predict/ner", response_model=NERResult)
 def predict_ner(req: NERRequest):
     # Default entity types suitable for freelancer contracts

 @app.post("/predict/explain")
 def explain_text(req: ExplanationRequest):
     tokenizer = models["explain_tokenizer"]
     model = models["explain_model"]
+    # FLAN-T5 models fine-tuned on summarization require the "summarize: " prefix
+    input_text = f"summarize: {req.text}"
+    # If the user asks for an 'explain', we can still frame it as an intensive summary
+    if req.mode == "explain":
+        input_text = f"summarize in detail: {req.text}"
+    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             max_new_tokens=150,
+            num_beams=5,
+            length_penalty=2.0,      # Encourage longer generation
+            no_repeat_ngram_size=3,  # Prevent repetition
             early_stopping=True
         )
     result = tokenizer.decode(outputs[0], skip_special_tokens=True)
 @app.post("/predict/ner", response_model=NERResult)
 def predict_ner(req: NERRequest):
     # Default entity types suitable for freelancer contracts