Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ from pydantic import Field
|
|
| 5 |
from fastapi.responses import JSONResponse
|
| 6 |
import numpy as np
|
| 7 |
from transformers import pipeline
|
|
|
|
| 8 |
import torch
|
| 9 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 10 |
from transformers import AutoConfig
|
|
@@ -45,14 +46,11 @@ def summarizer():
|
|
| 45 |
return o
|
| 46 |
|
| 47 |
def anq():
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
k = pipeline("question-answering", model=model, tokenizer=tokenizer,max_answer_len=5)
|
| 54 |
-
|
| 55 |
-
return k
|
| 56 |
|
| 57 |
def clause(sen):
|
| 58 |
|
|
@@ -159,23 +157,42 @@ def summary():
|
|
| 159 |
|
| 160 |
@app.post("/qna")
|
| 161 |
|
| 162 |
-
def quesans(py
|
| 163 |
-
|
| 164 |
txt2 = pdf_cache["text"]
|
| 165 |
-
|
| 166 |
if not txt2:
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
-
|
| 173 |
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
|
| 181 |
@app.post("/clausedetection")
|
|
|
|
| 5 |
from fastapi.responses import JSONResponse
|
| 6 |
import numpy as np
|
| 7 |
from transformers import pipeline
|
| 8 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 9 |
import torch
|
| 10 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 11 |
from transformers import AutoConfig
|
|
|
|
| 46 |
return o
|
| 47 |
|
| 48 |
def anq():
|
| 49 |
+
model_name = "google/flan-t5-large"
|
| 50 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 51 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 52 |
+
return (tokenizer, model)
|
| 53 |
+
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
def clause(sen):
|
| 56 |
|
|
|
|
| 157 |
|
| 158 |
@app.post("/qna")
|
| 159 |
|
| 160 |
+
def quesans(py: qna):
|
| 161 |
+
|
| 162 |
txt2 = pdf_cache["text"]
|
| 163 |
+
|
| 164 |
if not txt2:
|
| 165 |
+
raise HTTPException(
|
| 166 |
+
status_code=400,
|
| 167 |
+
detail="No PDF text found. Upload PDF first."
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
tokenizer, model = anq()
|
| 171 |
+
|
| 172 |
+
# FLAN-T5 prompt
|
| 173 |
|
| 174 |
+
prompt = (
|
| 175 |
+
f"Provide the answer in only 1 to 3 words.\n"
|
| 176 |
+
f"Question: {py.question}\n"
|
| 177 |
+
f"Context: {txt2}\n"
|
| 178 |
+
f"Answer:"
|
| 179 |
+
)
|
| 180 |
|
| 181 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 182 |
|
| 183 |
+
outputs = model.generate(
|
| 184 |
+
**inputs,
|
| 185 |
+
max_length=20,
|
| 186 |
+
num_beams=5,
|
| 187 |
+
early_stopping=True
|
| 188 |
+
)
|
| 189 |
|
| 190 |
+
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 191 |
+
|
| 192 |
+
# Final small cleanup (optional but useful)
|
| 193 |
+
answer = clean_short(answer)
|
| 194 |
+
|
| 195 |
+
return {"answer": answer}
|
| 196 |
|
| 197 |
|
| 198 |
@app.post("/clausedetection")
|