Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from langchain_groq import ChatGroq | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain_community.document_loaders import PyPDFLoader | |
| import tempfile | |
| import shutil | |
| MODEL_NAME = "llama-3.3-70b-versatile" | |
| DEFAULT_API_KEY = os.getenv("GROQ_API_KEY", "") | |
| # Global variables | |
| vectorstore = None | |
| conversation_chain = None | |
| chat_history = [] | |
| def process_pdf(pdf_file, api_key): | |
| """Process uploaded PDF and create vector store""" | |
| global vectorstore, conversation_chain, chat_history | |
| if not api_key: | |
| return "Please provide a Groq API key first.", None | |
| if pdf_file is None: | |
| return "Please upload a PDF file.", None | |
| try: | |
| # Save uploaded file temporarily | |
| temp_dir = tempfile.mkdtemp() | |
| temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf") | |
| shutil.copy(pdf_file.name, temp_pdf_path) | |
| # Load PDF | |
| loader = PyPDFLoader(temp_pdf_path) | |
| documents = loader.load() | |
| # Split documents into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| length_function=len | |
| ) | |
| chunks = text_splitter.split_documents(documents) | |
| # Create embeddings and vector store | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| vectorstore = FAISS.from_documents(chunks, embeddings) | |
| # Initialize LLM | |
| llm = ChatGroq( | |
| groq_api_key=api_key, | |
| model_name=MODEL_NAME, | |
| temperature=0.7, | |
| max_tokens=1024 | |
| ) | |
| # Create conversation chain | |
| memory = ConversationBufferMemory( | |
| memory_key="chat_history", | |
| return_messages=True, | |
| output_key="answer" | |
| ) | |
| conversation_chain = ConversationalRetrievalChain.from_llm( | |
| llm=llm, | |
| retriever=vectorstore.as_retriever(search_kwargs={"k": 3}), | |
| memory=memory, | |
| return_source_documents=True | |
| ) | |
| # Reset chat history | |
| chat_history = [] | |
| # Cleanup | |
| shutil.rmtree(temp_dir) | |
| return f"✅ PDF processed successfully! Found {len(chunks)} text chunks. You can now ask questions about the document.", [] | |
| except Exception as e: | |
| return f"Error processing PDF: {str(e)}", None | |
| def chat_with_pdf(message, chat_history_ui, api_key): | |
| """Handle chat interactions with the PDF content""" | |
| global conversation_chain, chat_history | |
| if not message.strip(): | |
| return chat_history_ui, "" | |
| if conversation_chain is None: | |
| chat_history_ui.append({ | |
| "role": "user", | |
| "content": message | |
| }) | |
| chat_history_ui.append({ | |
| "role": "assistant", | |
| "content": "Please upload a PDF file first before asking questions." | |
| }) | |
| return chat_history_ui, "" | |
| try: | |
| # Add user message | |
| chat_history_ui.append({ | |
| "role": "user", | |
| "content": message | |
| }) | |
| # Get response from RAG chain | |
| response = conversation_chain({"question": message}) | |
| answer = response["answer"] | |
| # Add assistant response | |
| chat_history_ui.append({ | |
| "role": "assistant", | |
| "content": answer | |
| }) | |
| return chat_history_ui, "" | |
| except Exception as e: | |
| chat_history_ui.append({ | |
| "role": "assistant", | |
| "content": f"Error: {str(e)}" | |
| }) | |
| return chat_history_ui, "" | |
| def reset_chat(): | |
| """Reset the conversation""" | |
| global conversation_chain, vectorstore, chat_history | |
| conversation_chain = None | |
| vectorstore = None | |
| chat_history = [] | |
| return [], "Ready to upload a new PDF." | |
| # Build Gradio Interface | |
| with gr.Blocks(title="PDF RAG Chatbot") as demo: | |
| gr.Markdown("# 📄 PDF RAG Chatbot") | |
| gr.Markdown("Upload a PDF and chat with its content using AI") | |
| gr.Markdown(f"**Model:** `{MODEL_NAME}`") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| if not DEFAULT_API_KEY: | |
| api_key_input = gr.Textbox( | |
| label="Groq API Key", | |
| placeholder="Enter your Groq API key here...", | |
| type="password" | |
| ) | |
| else: | |
| api_key_input = gr.Textbox( | |
| type="password", | |
| value=DEFAULT_API_KEY, | |
| visible=False | |
| ) | |
| pdf_upload = gr.File( | |
| label="Upload PDF", | |
| file_types=[".pdf"], | |
| type="filepath" | |
| ) | |
| process_btn = gr.Button("Process PDF", variant="primary") | |
| status_text = gr.Textbox( | |
| label="Status", | |
| value="Upload a PDF to get started.", | |
| interactive=False, | |
| lines=3, | |
| max_lines=5 | |
| ) | |
| clear_btn = gr.Button("Reset Chat", variant="stop") | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot(height=500) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| label="Message", | |
| placeholder="Ask a question about the PDF...", | |
| scale=4 | |
| ) | |
| submit_btn = gr.Button("Send", scale=1) | |
| if not DEFAULT_API_KEY: | |
| gr.Markdown("### Instructions:") | |
| gr.Markdown("1. Get a free API key from [Groq Console](https://console.groq.com)") | |
| gr.Markdown("2. Enter your API key above") | |
| gr.Markdown("3. Upload a PDF file") | |
| gr.Markdown("4. Ask questions about the content!") | |
| # Event handlers | |
| process_btn.click( | |
| process_pdf, | |
| inputs=[pdf_upload, api_key_input], | |
| outputs=[status_text, chatbot] | |
| ) | |
| msg.submit( | |
| chat_with_pdf, | |
| inputs=[msg, chatbot, api_key_input], | |
| outputs=[chatbot, msg] | |
| ) | |
| submit_btn.click( | |
| chat_with_pdf, | |
| inputs=[msg, chatbot, api_key_input], | |
| outputs=[chatbot, msg] | |
| ) | |
| clear_btn.click( | |
| reset_chat, | |
| outputs=[chatbot, status_text] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |