Spaces:

Sakil
/

testrag

Sleeping

App Files Files Community

Sakil commited on Nov 7, 2025

Commit

be5e1bb

verified ·

1 Parent(s): 4e0e3fd

created app file

Browse files

Files changed (1) hide show

app.py +298 -0

app.py ADDED Viewed

	@@ -0,0 +1,298 @@

+# -*- coding: utf-8 -*-
+"""final_app
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1pG3uDsJzglvQecdTcY76aXa5ObFadRux
+"""
+# !pip install gradio langchain langchain-community langchain-huggingface langchain-groq faiss-cpu sentence-transformers pypdf
+import gradio as gr
+import os
+import tempfile
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_groq import ChatGroq
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+# Groq API Key
+GROQ_API_KEY = "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"
+os.environ["GROQ_API_KEY"] = GROQ_API_KEY
+# Global variables to store vectorstore and processed files
+vectorstore = None
+processed_files_list = []
+def process_pdfs(files):
+    """Process uploaded PDF files and create vector store"""
+    global vectorstore, processed_files_list
+    if not files:
+        return "⚠️ Please upload at least one PDF file", ""
+    try:
+        all_documents = []
+        processed_names = []
+        # Process each uploaded PDF
+        for file in files:
+            # Load PDF
+            loader = PyPDFLoader(file.name)
+            documents = loader.load()
+            all_documents.extend(documents)
+            processed_names.append(os.path.basename(file.name))
+        if not all_documents:
+            return "❌ No content extracted from PDFs", ""
+        # Split documents into chunks
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            length_function=len
+        )
+        splits = text_splitter.split_documents(all_documents)
+        # Create embeddings
+        embeddings = HuggingFaceEmbeddings(
+            model_name="sentence-transformers/all-MiniLM-L6-v2",
+            model_kwargs={'device': 'cpu'}
+        )
+        # Create vector store
+        vectorstore = FAISS.from_documents(splits, embeddings)
+        processed_files_list = processed_names
+        success_msg = f"✅ Successfully processed {len(files)} document(s)!\n"
+        success_msg += f"📊 Created {len(splits)} text chunks for retrieval\n\n"
+        success_msg += "📄 Processed files:\n" + "\n".join([f"  • {name}" for name in processed_names])
+        return success_msg, "✅ Documents processed! You can now ask questions."
+    except Exception as e:
+        return f"❌ Error processing documents: {str(e)}", ""
+def answer_question(question, chat_history):
+    """Answer questions based on the processed documents"""
+    global vectorstore
+    if not vectorstore:
+        return chat_history + [[question, "⚠️ Please upload and process PDF documents first!"]]
+    if not question or question.strip() == "":
+        return chat_history + [[question, "⚠️ Please enter a valid question."]]
+    try:
+        # Initialize LLM with stricter temperature for factual answers
+        llm = ChatGroq(
+            model="llama-3.1-8b-instant",
+            temperature=0,  # Set to 0 for most deterministic, factual responses
+            max_tokens=1024,
+            api_key=GROQ_API_KEY
+        )
+        # Create custom prompt with strict context-only answering
+        prompt_template = """You are a helpful assistant that answers questions ONLY based on the provided context from uploaded PDF documents.
+CRITICAL INSTRUCTIONS:
+- Answer ONLY if the information is present in the context below
+- If the context does not contain relevant information to answer the question, you MUST respond with: "I don't know the answer. This information is not available in the uploaded documents."
+- DO NOT use any external knowledge or information not present in the context
+- DO NOT make assumptions or inferences beyond what is explicitly stated in the context
+- If you're unsure whether the context contains the answer, say you don't know
+Context from uploaded documents:
+{context}
+Question: {question}
+Answer (only from the context above):"""
+        PROMPT = PromptTemplate(
+            template=prompt_template,
+            input_variables=["context", "question"]
+        )
+        # Create retrieval chain with enhanced retrieval settings
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=llm,
+            chain_type="stuff",
+            retriever=vectorstore.as_retriever(
+                search_type="similarity",
+                search_kwargs={
+                    "k": 5,  # Retrieve top 5 most relevant chunks
+                    "fetch_k": 20  # Fetch more candidates before filtering
+                }
+            ),
+            chain_type_kwargs={"prompt": PROMPT},
+            return_source_documents=True
+        )
+        # Get response
+        result = qa_chain({"query": question})
+        answer = result['result']
+        source_docs = result.get('source_documents', [])
+        # Add source information if available
+        if source_docs and "don't know" not in answer.lower():
+            answer += "\n\n📌 **Sources found in documents:**"
+            unique_sources = set()
+            for doc in source_docs[:3]:  # Show top 3 sources
+                source = doc.metadata.get('source', 'Unknown')
+                page = doc.metadata.get('page', 'Unknown')
+                source_id = f"{source} (Page {page})"
+                if source_id not in unique_sources:
+                    unique_sources.add(source_id)
+            for source in unique_sources:
+                answer += f"\n  • {source}"
+        # Update chat history
+        chat_history = chat_history + [[question, answer]]
+        return chat_history
+    except Exception as e:
+        error_msg = f"❌ Error generating answer: {str(e)}"
+        return chat_history + [[question, error_msg]]
+def clear_data():
+    """Clear all processed data"""
+    global vectorstore, processed_files_list
+    vectorstore = None
+    processed_files_list = []
+    return "🗑️ All data cleared. Please upload new documents.", "", []
+# Custom CSS for better styling
+custom_css = """
+#title {
+    text-align: center;
+    background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    font-size: 2.5em;
+    font-weight: bold;
+    margin-bottom: 10px;
+}
+#subtitle {
+    text-align: center;
+    color: #666;
+    font-size: 1.2em;
+    margin-bottom: 20px;
+}
+.gradio-container {
+    max-width: 1200px !important;
+    margin: auto !important;
+}
+"""
+# Create Gradio interface
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    # Header
+    gr.HTML("<h1 id='title'>📚 Slashbyte RAG</h1>")
+    gr.HTML("<p id='subtitle'>Upload PDFs and ask questions using AI-powered retrieval</p>")
+    with gr.Row():
+        # Left column - Document Upload
+        with gr.Column(scale=1):
+            gr.Markdown("### 📄 Document Upload")
+            file_upload = gr.File(
+                label="Upload PDF Documents",
+                file_types=[".pdf"],
+                file_count="multiple"
+            )
+            process_btn = gr.Button("🔄 Process Documents", variant="primary", size="lg")
+            process_output = gr.Textbox(
+                label="Processing Status",
+                lines=8,
+                interactive=False
+            )
+            clear_btn = gr.Button("🗑️ Clear All Data", variant="stop")
+            gr.Markdown("""
+            ---
+            ### ℹ️ How to Use
+            1. **Upload PDFs** using the file uploader
+            2. Click **Process Documents**
+            3. **Ask questions** in the chat
+            4. Get **AI-powered answers**
+            **Features:**
+            - 📄 Multiple PDF support
+            - 🤖 Powered by Groq LLM
+            - 🔍 Semantic search
+            - 💾 Chat history
+            """)
+        # Right column - Chat Interface
+        with gr.Column(scale=2):
+            gr.Markdown("### 💬 Ask Questions")
+            status_text = gr.Textbox(
+                label="Status",
+                value="⚠️ Upload and process documents to start",
+                interactive=False
+            )
+            chatbot = gr.Chatbot(
+                label="Chat History",
+                height=400,
+                show_label=True
+            )
+            with gr.Row():
+                question_input = gr.Textbox(
+                    label="Your Question",
+                    placeholder="Ask anything about your documents...",
+                    scale=4
+                )
+                submit_btn = gr.Button("🚀 Ask", variant="primary", scale=1)
+            clear_chat_btn = gr.Button("🧹 Clear Chat")
+    # Footer
+    gr.HTML("""
+        <div style='text-align: center; color: #666; padding: 20px; margin-top: 20px; border-top: 1px solid #ddd;'>
+            <p>Powered by Langchain, Groq, and HuggingFace | Built with ❤️ using Gradio</p>
+        </div>
+    """)
+    # Event handlers
+    process_btn.click(
+        fn=process_pdfs,
+        inputs=[file_upload],
+        outputs=[process_output, status_text]
+    )
+    submit_btn.click(
+        fn=answer_question,
+        inputs=[question_input, chatbot],
+        outputs=[chatbot]
+    ).then(
+        lambda: "",
+        outputs=[question_input]
+    )
+    question_input.submit(
+        fn=answer_question,
+        inputs=[question_input, chatbot],
+        outputs=[chatbot]
+    ).then(
+        lambda: "",
+        outputs=[question_input]
+    )
+    clear_chat_btn.click(
+        fn=lambda: [],
+        outputs=[chatbot]
+    )
+    clear_btn.click(
+        fn=clear_data,
+        outputs=[process_output, status_text, chatbot]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        share=True,
+        server_name="0.0.0.0",
+        server_port=7860
+    )