|
|
|
|
|
"""backend.ipynb |
|
|
|
|
|
Automatically generated by Colab. |
|
|
|
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1ELiw5PgFIpVzc3HtrCiyILA9HbrL-WXg |
|
|
""" |
|
|
|
|
|
import fitz |
|
|
import os |
|
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
|
|
|
client = InferenceClient( |
|
|
model="google/flan-t5-large", |
|
|
token=os.getenv("HUGGINGFACEHUB_API_TOKEN") |
|
|
) |
|
|
|
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
|
text = "" |
|
|
doc = fitz.open(stream=pdf_file.read(), filetype="pdf") |
|
|
for page in doc: |
|
|
text += page.get_text() |
|
|
return text |
|
|
|
|
|
|
|
|
def generate_answer(user_question, pdf_context): |
|
|
prompt = f""" |
|
|
You are a helpful assistant. Use the context from the PDF to answer the question clearly and accurately. |
|
|
|
|
|
Context: {pdf_context} |
|
|
|
|
|
Question: {user_question} |
|
|
|
|
|
Answer: |
|
|
""" |
|
|
response = client.text_generation( |
|
|
prompt=prompt, |
|
|
max_new_tokens=300, |
|
|
temperature=0.5, |
|
|
top_p=0.9, |
|
|
repetition_penalty=1.05 |
|
|
) |
|
|
return response |