| import os |
| import gradio as gr |
|
|
| from langchain_community.document_loaders import PyPDFLoader |
| from langchain_text_splitters import RecursiveCharacterTextSplitter |
| |
| from langchain_chroma import Chroma |
|
|
| from langchain_huggingface import ( |
| HuggingFaceEmbeddings, |
| HuggingFaceEndpoint |
| ) |
|
|
| |
| |
| |
|
|
| HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
| |
| |
| |
|
|
| embedding_model = HuggingFaceEmbeddings( |
| model_name="sentence-transformers/all-MiniLM-L6-v2", |
| model_kwargs={'device': 'cpu'}, |
| encode_kwargs={'normalize_embeddings': True} |
| ) |
| |
| |
| |
| llm = HuggingFaceEndpoint( |
| repo_id="NousResearch/Llama-2-7b-chat-hf", |
| task="text-generation", |
| huggingfacehub_api_token=HF_TOKEN, |
| temperature=0.5, |
| max_new_tokens=512 |
| ) |
|
|
| |
| |
| |
| db = None |
|
|
| |
| |
| |
| def process_pdf(pdf_file): |
| global db |
|
|
| if pdf_file is None: |
| return "Please upload a PDF file." |
|
|
| try: |
| |
| loader = PyPDFLoader(pdf_file.name) |
| documents = loader.load() |
|
|
| |
| splitter = RecursiveCharacterTextSplitter( |
| chunk_size=1000, |
| chunk_overlap=200 |
| ) |
| docs = splitter.split_documents(documents) |
|
|
| |
| db = Chroma.from_documents( |
| documents=docs, |
| embedding=embedding_model |
| ) |
|
|
| return "PDF processed and indexed in ChromaDB successfully!" |
| except Exception as e: |
| return f"Error processing PDF: {str(e)}" |
|
|
| |
| |
| |
| def ask_question(question): |
| global db |
|
|
| if db is None: |
| return "Please upload and process a PDF first." |
|
|
| if question.strip() == "": |
| return "Please enter a question." |
|
|
| try: |
| |
| docs = db.similarity_search( |
| question, |
| k=3 |
| ) |
|
|
| |
| context = "\n\n".join( |
| [doc.page_content for doc in docs] |
| ) |
|
|
| |
| prompt = f"""You are a helpful PDF question answering assistant. |
| |
| Answer the question ONLY from the provided context. |
| |
| If the answer is not in the context, say: |
| "I could not find the answer in the PDF." |
| |
| Context: |
| {context} |
| |
| Question: |
| {question} |
| |
| Answer:""" |
|
|
|
|
| response = llm.invoke(prompt) |
| return response |
| |
| except Exception as e: |
| return f"Error generating answer: {str(e)}" |
|
|
| |
| |
| |
|
|
| with gr.Blocks() as demo: |
|
|
| gr.Markdown("# PDF Question Answering Bot (Powered by ChromaDB)") |
|
|
| pdf_input = gr.File( |
| label="Upload PDF", |
| file_types=[".pdf"] |
| ) |
|
|
| process_btn = gr.Button("Process PDF") |
|
|
| process_output = gr.Textbox( |
| label="PDF Status" |
| ) |
|
|
| process_btn.click( |
| fn=process_pdf, |
| inputs=pdf_input, |
| outputs=process_output |
| ) |
|
|
| question_input = gr.Textbox( |
| label="Ask a Question" |
| ) |
|
|
| ask_btn = gr.Button("Get Answer") |
|
|
| answer_output = gr.Textbox( |
| label="Answer", |
| lines=10 |
| ) |
|
|
| ask_btn.click( |
| fn=ask_question, |
| inputs=question_input, |
| outputs=answer_output |
| ) |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| demo.launch() |