krrishsinha commited on
Commit
ca0b46f
·
verified ·
1 Parent(s): 1941458

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -19
app.py CHANGED
@@ -5,6 +5,7 @@ from pydantic import Field
5
  from fastapi.responses import JSONResponse
6
  import numpy as np
7
  from transformers import pipeline
 
8
  import torch
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
  from transformers import AutoConfig
@@ -45,14 +46,11 @@ def summarizer():
45
  return o
46
 
47
  def anq():
48
-
49
- qnap = "krrishsinha/nlpques-ans"
50
-
51
- tokenizer = AutoTokenizer.from_pretrained(qnap, use_fast=False)
52
- model = AutoModelForQuestionAnswering.from_pretrained(qnap)
53
- k = pipeline("question-answering", model=model, tokenizer=tokenizer,max_answer_len=5)
54
-
55
- return k
56
 
57
  def clause(sen):
58
 
@@ -159,23 +157,42 @@ def summary():
159
 
160
  @app.post("/qna")
161
 
162
- def quesans(py : qna):
163
-
164
  txt2 = pdf_cache["text"]
165
-
166
  if not txt2:
167
-
168
- raise HTTPException(status_code=400, detail="No PDF text found. Upload PDF first.")
 
 
 
 
 
 
169
 
170
- g = anq()
 
 
 
 
 
171
 
172
- forced_question = py.question + " (Give answer in 1 to 3 words only.)"
173
 
174
- result = g (question= forced_question, context= txt2)
 
 
 
 
 
175
 
176
- cleaned = clean_short(ans = result["answer"])
177
-
178
- return {"answer" : cleaned}
 
 
 
179
 
180
 
181
  @app.post("/clausedetection")
 
5
  from fastapi.responses import JSONResponse
6
  import numpy as np
7
  from transformers import pipeline
8
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
9
  import torch
10
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
  from transformers import AutoConfig
 
46
  return o
47
 
48
  def anq():
49
+ model_name = "google/flan-t5-large"
50
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
51
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
52
+ return (tokenizer, model)
53
+
 
 
 
54
 
55
  def clause(sen):
56
 
 
157
 
158
  @app.post("/qna")
159
 
160
+ def quesans(py: qna):
161
+
162
  txt2 = pdf_cache["text"]
163
+
164
  if not txt2:
165
+ raise HTTPException(
166
+ status_code=400,
167
+ detail="No PDF text found. Upload PDF first."
168
+ )
169
+
170
+ tokenizer, model = anq()
171
+
172
+ # FLAN-T5 prompt
173
 
174
+ prompt = (
175
+ f"Provide the answer in only 1 to 3 words.\n"
176
+ f"Question: {py.question}\n"
177
+ f"Context: {txt2}\n"
178
+ f"Answer:"
179
+ )
180
 
181
+ inputs = tokenizer(prompt, return_tensors="pt")
182
 
183
+ outputs = model.generate(
184
+ **inputs,
185
+ max_length=20,
186
+ num_beams=5,
187
+ early_stopping=True
188
+ )
189
 
190
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
191
+
192
+ # Final small cleanup (optional but useful)
193
+ answer = clean_short(answer)
194
+
195
+ return {"answer": answer}
196
 
197
 
198
  @app.post("/clausedetection")