| | |
| | """final_app |
| | Automatically generated by Colab. |
| | Original file is located at |
| | https://colab.research.google.com/drive/1pG3uDsJzglvQecdTcY76aXa5ObFadRux |
| | """ |
| |
|
| | |
| |
|
| |
|
| |
|
| | import gradio as gr |
| | import os |
| | import tempfile |
| | from langchain_community.document_loaders import PyPDFLoader |
| | from langchain_community.vectorstores import FAISS |
| | from langchain_huggingface import HuggingFaceEmbeddings |
| | from langchain_text_splitters import RecursiveCharacterTextSplitter |
| | from langchain_groq import ChatGroq |
| | from langchain.chains import RetrievalQA |
| | from langchain.prompts import PromptTemplate |
| |
|
| | |
| | GROQ_API_KEY = "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" |
| | os.environ["GROQ_API_KEY"] = GROQ_API_KEY |
| |
|
| | |
| | vectorstore = None |
| | processed_files_list = [] |
| |
|
| | def process_pdfs(files): |
| | """Process uploaded PDF files and create vector store""" |
| | global vectorstore, processed_files_list |
| |
|
| | if not files: |
| | return "β οΈ Please upload at least one PDF file", "" |
| |
|
| | try: |
| | all_documents = [] |
| | processed_names = [] |
| |
|
| | |
| | for file in files: |
| | |
| | loader = PyPDFLoader(file.name) |
| | documents = loader.load() |
| | all_documents.extend(documents) |
| | processed_names.append(os.path.basename(file.name)) |
| |
|
| | if not all_documents: |
| | return "β No content extracted from PDFs", "" |
| |
|
| | |
| | text_splitter = RecursiveCharacterTextSplitter( |
| | chunk_size=1000, |
| | chunk_overlap=200, |
| | length_function=len |
| | ) |
| | splits = text_splitter.split_documents(all_documents) |
| |
|
| | |
| | embeddings = HuggingFaceEmbeddings( |
| | model_name="sentence-transformers/all-MiniLM-L6-v2", |
| | model_kwargs={'device': 'cpu'} |
| | ) |
| |
|
| | |
| | vectorstore = FAISS.from_documents(splits, embeddings) |
| | processed_files_list = processed_names |
| |
|
| | success_msg = f"β
Successfully processed {len(files)} document(s)!\n" |
| | success_msg += f"π Created {len(splits)} text chunks for retrieval\n\n" |
| | success_msg += "π Processed files:\n" + "\n".join([f" β’ {name}" for name in processed_names]) |
| |
|
| | return success_msg, "β
Documents processed! You can now ask questions." |
| |
|
| | except Exception as e: |
| | return f"β Error processing documents: {str(e)}", "" |
| |
|
| | def answer_question(question, chat_history): |
| | """Answer questions based on the processed documents""" |
| | global vectorstore |
| |
|
| | if not vectorstore: |
| | return chat_history + [[question, "β οΈ Please upload and process PDF documents first!"]] |
| |
|
| | if not question or question.strip() == "": |
| | return chat_history + [[question, "β οΈ Please enter a valid question."]] |
| |
|
| | try: |
| | |
| | llm = ChatGroq( |
| | model="llama-3.1-8b-instant", |
| | temperature=0, |
| | max_tokens=1024, |
| | api_key=GROQ_API_KEY |
| | ) |
| |
|
| | |
| | prompt_template = """You are a helpful assistant that answers questions ONLY based on the provided context from uploaded PDF documents. |
| | CRITICAL INSTRUCTIONS: |
| | - Answer ONLY if the information is present in the context below |
| | - If the context does not contain relevant information to answer the question, you MUST respond with: "I don't know the answer. This information is not available in the uploaded documents." |
| | - DO NOT use any external knowledge or information not present in the context |
| | - DO NOT make assumptions or inferences beyond what is explicitly stated in the context |
| | - If you're unsure whether the context contains the answer, say you don't know |
| | Context from uploaded documents: |
| | {context} |
| | Question: {question} |
| | Answer (only from the context above):""" |
| |
|
| | PROMPT = PromptTemplate( |
| | template=prompt_template, |
| | input_variables=["context", "question"] |
| | ) |
| |
|
| | |
| | qa_chain = RetrievalQA.from_chain_type( |
| | llm=llm, |
| | chain_type="stuff", |
| | retriever=vectorstore.as_retriever( |
| | search_type="similarity", |
| | search_kwargs={ |
| | "k": 5, |
| | "fetch_k": 20 |
| | } |
| | ), |
| | chain_type_kwargs={"prompt": PROMPT}, |
| | return_source_documents=True |
| | ) |
| |
|
| | |
| | result = qa_chain({"query": question}) |
| | answer = result['result'] |
| | source_docs = result.get('source_documents', []) |
| |
|
| | |
| | if source_docs and "don't know" not in answer.lower(): |
| | answer += "\n\nπ **Sources found in documents:**" |
| | unique_sources = set() |
| | for doc in source_docs[:3]: |
| | source = doc.metadata.get('source', 'Unknown') |
| | page = doc.metadata.get('page', 'Unknown') |
| | source_id = f"{source} (Page {page})" |
| | if source_id not in unique_sources: |
| | unique_sources.add(source_id) |
| |
|
| | for source in unique_sources: |
| | answer += f"\n β’ {source}" |
| |
|
| | |
| | chat_history = chat_history + [[question, answer]] |
| |
|
| | return chat_history |
| |
|
| | except Exception as e: |
| | error_msg = f"β Error generating answer: {str(e)}" |
| | return chat_history + [[question, error_msg]] |
| |
|
| | def clear_data(): |
| | """Clear all processed data""" |
| | global vectorstore, processed_files_list |
| | vectorstore = None |
| | processed_files_list = [] |
| | return "ποΈ All data cleared. Please upload new documents.", "", [] |
| |
|
| | |
| | custom_css = """ |
| | #title { |
| | text-align: center; |
| | background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); |
| | -webkit-background-clip: text; |
| | -webkit-text-fill-color: transparent; |
| | font-size: 2.5em; |
| | font-weight: bold; |
| | margin-bottom: 10px; |
| | } |
| | #subtitle { |
| | text-align: center; |
| | color: #666; |
| | font-size: 1.2em; |
| | margin-bottom: 20px; |
| | } |
| | .gradio-container { |
| | max-width: 1200px !important; |
| | margin: auto !important; |
| | } |
| | """ |
| |
|
| | |
| | with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: |
| | |
| | gr.HTML("<h1 id='title'>π Slashbyte RAG</h1>") |
| | gr.HTML("<p id='subtitle'>Upload PDFs and ask questions using AI-powered retrieval</p>") |
| |
|
| | with gr.Row(): |
| | |
| | with gr.Column(scale=1): |
| | gr.Markdown("### π Document Upload") |
| | file_upload = gr.File( |
| | label="Upload PDF Documents", |
| | file_types=[".pdf"], |
| | file_count="multiple" |
| | ) |
| | process_btn = gr.Button("π Process Documents", variant="primary", size="lg") |
| | process_output = gr.Textbox( |
| | label="Processing Status", |
| | lines=8, |
| | interactive=False |
| | ) |
| | clear_btn = gr.Button("ποΈ Clear All Data", variant="stop") |
| |
|
| | gr.Markdown(""" |
| | --- |
| | ### βΉοΈ How to Use |
| | 1. **Upload PDFs** using the file uploader |
| | 2. Click **Process Documents** |
| | 3. **Ask questions** in the chat |
| | 4. Get **AI-powered answers** |
| | **Features:** |
| | - π Multiple PDF support |
| | - π€ Powered by Groq LLM |
| | - π Semantic search |
| | - πΎ Chat history |
| | """) |
| |
|
| | |
| | with gr.Column(scale=2): |
| | gr.Markdown("### π¬ Ask Questions") |
| | status_text = gr.Textbox( |
| | label="Status", |
| | value="β οΈ Upload and process documents to start", |
| | interactive=False |
| | ) |
| | chatbot = gr.Chatbot( |
| | label="Chat History", |
| | height=400, |
| | show_label=True |
| | ) |
| | with gr.Row(): |
| | question_input = gr.Textbox( |
| | label="Your Question", |
| | placeholder="Ask anything about your documents...", |
| | scale=4 |
| | ) |
| | submit_btn = gr.Button("π Ask", variant="primary", scale=1) |
| |
|
| | clear_chat_btn = gr.Button("π§Ή Clear Chat") |
| |
|
| | |
| | gr.HTML(""" |
| | <div style='text-align: center; color: #666; padding: 20px; margin-top: 20px; border-top: 1px solid #ddd;'> |
| | <p>Powered by Langchain, Groq, and HuggingFace | Built with β€οΈ using Gradio</p> |
| | </div> |
| | """) |
| |
|
| | |
| | process_btn.click( |
| | fn=process_pdfs, |
| | inputs=[file_upload], |
| | outputs=[process_output, status_text] |
| | ) |
| |
|
| | submit_btn.click( |
| | fn=answer_question, |
| | inputs=[question_input, chatbot], |
| | outputs=[chatbot] |
| | ).then( |
| | lambda: "", |
| | outputs=[question_input] |
| | ) |
| |
|
| | question_input.submit( |
| | fn=answer_question, |
| | inputs=[question_input, chatbot], |
| | outputs=[chatbot] |
| | ).then( |
| | lambda: "", |
| | outputs=[question_input] |
| | ) |
| |
|
| | clear_chat_btn.click( |
| | fn=lambda: [], |
| | outputs=[chatbot] |
| | ) |
| |
|
| | clear_btn.click( |
| | fn=clear_data, |
| | outputs=[process_output, status_text, chatbot] |
| | ) |
| |
|
| | |
| | if __name__ == "__main__": |
| | demo.launch( |
| | share=True, |
| | server_name="0.0.0.0", |
| | server_port=7860 |
| | ) |