Spaces:
Runtime error
Runtime error
File size: 2,530 Bytes
2395611 1103643 2395611 1103643 2395611 1103643 2395611 1103643 2395611 1103643 2395611 1103643 d61809f 2395611 91a07db 2395611 1103643 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from huggingface_hub import InferenceClient
embedding_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'}
)
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
vectorstore = None
def process_pdf(pdf_file):
global vectorstore
if pdf_file is None:
return "Please upload a PDF file."
try:
loader = PyPDFLoader(pdf_file.name)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
vectorstore = FAISS.from_documents(documents=chunks, embedding=embedding_model)
return f"โ
Processed {len(documents)} pages into {len(chunks)} chunks."
except Exception as e:
return f"โ Error: {str(e)}"
def answer_question(question):
global vectorstore
if vectorstore is None:
return "Upload a PDF first.", ""
if not question.strip():
return "Enter a question.", ""
try:
docs = vectorstore.similarity_search(question, k=3)
context = "\n\n".join([doc.page_content for doc in docs])
prompt = f"<|system|>\nAnswer based on context only.\n</s>\n<|user|>\nContext:\n{context}\n\nQuestion: {question}\n</s>\n<|assistant|>\n"
response = client.text_generation(prompt, max_new_tokens=512, temperature=0.7)
sources = [f"{i}. Page {doc.metadata.get('page', 'N/A')}" for i, doc in enumerate(docs, 1)]
return response, "\n".join(sources)
except Exception as e:
return f"Error: {str(e)}", ""
with gr.Blocks() as demo:
gr.Markdown("# ๐ RAG Document Q&A")
with gr.Row():
with gr.Column():
pdf = gr.File(label="Upload PDF", file_types=[".pdf"])
btn1 = gr.Button("Process PDF")
status = gr.Textbox(label="Status")
with gr.Column():
question = gr.Textbox(label="Question")
btn2 = gr.Button("Ask")
answer = gr.Textbox(label="Answer", lines=5)
sources = gr.Textbox(label="Sources")
btn1.click(process_pdf, pdf, status)
btn2.click(answer_question, question, [answer, sources])
demo.launch(server_name="0.0.0.0", server_port=7860)
|