Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """final_app | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1pG3uDsJzglvQecdTcY76aXa5ObFadRux | |
| """ | |
| # !pip install gradio langchain langchain-community langchain-huggingface langchain-groq faiss-cpu sentence-transformers pypdf | |
| import gradio as gr | |
| import os | |
| import tempfile | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_groq import ChatGroq | |
| from langchain.chains import RetrievalQA | |
| from langchain.prompts import PromptTemplate | |
| # Groq API Key | |
| GROQ_API_KEY = "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" | |
| os.environ["GROQ_API_KEY"] = GROQ_API_KEY | |
| # Global variables to store vectorstore and processed files | |
| vectorstore = None | |
| processed_files_list = [] | |
| def process_pdfs(files): | |
| """Process uploaded PDF files and create vector store""" | |
| global vectorstore, processed_files_list | |
| if not files: | |
| return "β οΈ Please upload at least one PDF file", "" | |
| try: | |
| all_documents = [] | |
| processed_names = [] | |
| # Process each uploaded PDF | |
| for file in files: | |
| # Load PDF | |
| loader = PyPDFLoader(file.name) | |
| documents = loader.load() | |
| all_documents.extend(documents) | |
| processed_names.append(os.path.basename(file.name)) | |
| if not all_documents: | |
| return "β No content extracted from PDFs", "" | |
| # Split documents into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| length_function=len | |
| ) | |
| splits = text_splitter.split_documents(all_documents) | |
| # Create embeddings | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'} | |
| ) | |
| # Create vector store | |
| vectorstore = FAISS.from_documents(splits, embeddings) | |
| processed_files_list = processed_names | |
| success_msg = f"β Successfully processed {len(files)} document(s)!\n" | |
| success_msg += f"π Created {len(splits)} text chunks for retrieval\n\n" | |
| success_msg += "π Processed files:\n" + "\n".join([f" β’ {name}" for name in processed_names]) | |
| return success_msg, "β Documents processed! You can now ask questions." | |
| except Exception as e: | |
| return f"β Error processing documents: {str(e)}", "" | |
| def answer_question(question, chat_history): | |
| """Answer questions based on the processed documents""" | |
| global vectorstore | |
| if not vectorstore: | |
| return chat_history + [[question, "β οΈ Please upload and process PDF documents first!"]] | |
| if not question or question.strip() == "": | |
| return chat_history + [[question, "β οΈ Please enter a valid question."]] | |
| try: | |
| # Initialize LLM with stricter temperature for factual answers | |
| llm = ChatGroq( | |
| model="llama-3.1-8b-instant", | |
| temperature=0, # Set to 0 for most deterministic, factual responses | |
| max_tokens=1024, | |
| api_key=GROQ_API_KEY | |
| ) | |
| # Create custom prompt with strict context-only answering | |
| prompt_template = """You are a helpful assistant that answers questions ONLY based on the provided context from uploaded PDF documents. | |
| CRITICAL INSTRUCTIONS: | |
| - Answer ONLY if the information is present in the context below | |
| - If the context does not contain relevant information to answer the question, you MUST respond with: "I don't know the answer. This information is not available in the uploaded documents." | |
| - DO NOT use any external knowledge or information not present in the context | |
| - DO NOT make assumptions or inferences beyond what is explicitly stated in the context | |
| - If you're unsure whether the context contains the answer, say you don't know | |
| Context from uploaded documents: | |
| {context} | |
| Question: {question} | |
| Answer (only from the context above):""" | |
| PROMPT = PromptTemplate( | |
| template=prompt_template, | |
| input_variables=["context", "question"] | |
| ) | |
| # Create retrieval chain with enhanced retrieval settings | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=vectorstore.as_retriever( | |
| search_type="similarity", | |
| search_kwargs={ | |
| "k": 5, # Retrieve top 5 most relevant chunks | |
| "fetch_k": 20 # Fetch more candidates before filtering | |
| } | |
| ), | |
| chain_type_kwargs={"prompt": PROMPT}, | |
| return_source_documents=True | |
| ) | |
| # Get response | |
| result = qa_chain({"query": question}) | |
| answer = result['result'] | |
| source_docs = result.get('source_documents', []) | |
| # Add source information if available | |
| if source_docs and "don't know" not in answer.lower(): | |
| answer += "\n\nπ **Sources found in documents:**" | |
| unique_sources = set() | |
| for doc in source_docs[:3]: # Show top 3 sources | |
| source = doc.metadata.get('source', 'Unknown') | |
| page = doc.metadata.get('page', 'Unknown') | |
| source_id = f"{source} (Page {page})" | |
| if source_id not in unique_sources: | |
| unique_sources.add(source_id) | |
| for source in unique_sources: | |
| answer += f"\n β’ {source}" | |
| # Update chat history | |
| chat_history = chat_history + [[question, answer]] | |
| return chat_history | |
| except Exception as e: | |
| error_msg = f"β Error generating answer: {str(e)}" | |
| return chat_history + [[question, error_msg]] | |
| def clear_data(): | |
| """Clear all processed data""" | |
| global vectorstore, processed_files_list | |
| vectorstore = None | |
| processed_files_list = [] | |
| return "ποΈ All data cleared. Please upload new documents.", "", [] | |
| # Custom CSS for better styling | |
| custom_css = """ | |
| #title { | |
| text-align: center; | |
| background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin-bottom: 10px; | |
| } | |
| #subtitle { | |
| text-align: center; | |
| color: #666; | |
| font-size: 1.2em; | |
| margin-bottom: 20px; | |
| } | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| margin: auto !important; | |
| } | |
| """ | |
| # Create Gradio interface | |
| with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: | |
| # Header | |
| gr.HTML("<h1 id='title'>π Slashbyte RAG</h1>") | |
| gr.HTML("<p id='subtitle'>Upload PDFs and ask questions using AI-powered retrieval</p>") | |
| with gr.Row(): | |
| # Left column - Document Upload | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Document Upload") | |
| file_upload = gr.File( | |
| label="Upload PDF Documents", | |
| file_types=[".pdf"], | |
| file_count="multiple" | |
| ) | |
| process_btn = gr.Button("π Process Documents", variant="primary", size="lg") | |
| process_output = gr.Textbox( | |
| label="Processing Status", | |
| lines=8, | |
| interactive=False | |
| ) | |
| clear_btn = gr.Button("ποΈ Clear All Data", variant="stop") | |
| gr.Markdown(""" | |
| --- | |
| ### βΉοΈ How to Use | |
| 1. **Upload PDFs** using the file uploader | |
| 2. Click **Process Documents** | |
| 3. **Ask questions** in the chat | |
| 4. Get **AI-powered answers** | |
| **Features:** | |
| - π Multiple PDF support | |
| - π€ Powered by Groq LLM | |
| - π Semantic search | |
| - πΎ Chat history | |
| """) | |
| # Right column - Chat Interface | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π¬ Ask Questions") | |
| status_text = gr.Textbox( | |
| label="Status", | |
| value="β οΈ Upload and process documents to start", | |
| interactive=False | |
| ) | |
| chatbot = gr.Chatbot( | |
| label="Chat History", | |
| height=400, | |
| show_label=True | |
| ) | |
| with gr.Row(): | |
| question_input = gr.Textbox( | |
| label="Your Question", | |
| placeholder="Ask anything about your documents...", | |
| scale=4 | |
| ) | |
| submit_btn = gr.Button("π Ask", variant="primary", scale=1) | |
| clear_chat_btn = gr.Button("π§Ή Clear Chat") | |
| # Footer | |
| gr.HTML(""" | |
| <div style='text-align: center; color: #666; padding: 20px; margin-top: 20px; border-top: 1px solid #ddd;'> | |
| <p>Powered by Langchain, Groq, and HuggingFace | Built with β€οΈ using Gradio</p> | |
| </div> | |
| """) | |
| # Event handlers | |
| process_btn.click( | |
| fn=process_pdfs, | |
| inputs=[file_upload], | |
| outputs=[process_output, status_text] | |
| ) | |
| submit_btn.click( | |
| fn=answer_question, | |
| inputs=[question_input, chatbot], | |
| outputs=[chatbot] | |
| ).then( | |
| lambda: "", | |
| outputs=[question_input] | |
| ) | |
| question_input.submit( | |
| fn=answer_question, | |
| inputs=[question_input, chatbot], | |
| outputs=[chatbot] | |
| ).then( | |
| lambda: "", | |
| outputs=[question_input] | |
| ) | |
| clear_chat_btn.click( | |
| fn=lambda: [], | |
| outputs=[chatbot] | |
| ) | |
| clear_btn.click( | |
| fn=clear_data, | |
| outputs=[process_output, status_text, chatbot] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch( | |
| share=True, | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) |