Spaces:
Build error
Build error
| import os | |
| import asyncio | |
| import hashlib | |
| from dotenv import load_dotenv | |
| import gradio as gr | |
| # Load environment variables | |
| load_dotenv() | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| from langchain_pypdf import PyPDFLoader # Fix: was langchain_community | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_core.prompts import PromptTemplate # Fix: replaces load_qa_chain | |
| from langchain_core.output_parsers import StrOutputParser # Fix: replaces load_qa_chain | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| # Pre-initialize Embeddings Model to prevent slow reloads on every upload | |
| print("LOG: Pre-initializing HuggingFace Embeddings model...") | |
| EMBEDDINGS = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={"device": "cpu"}, | |
| ) | |
| print("LOG: Embeddings pipeline ready.") | |
| def build_vectorstore_gradio(uploaded_file): | |
| if uploaded_file is None: | |
| return ( | |
| None, | |
| """ | |
| <div style="background: rgba(255, 255, 255, 0.02); border: 1px dashed rgba(255, 255, 255, 0.1); border-radius: 8px; padding: 12px; text-align: center;"> | |
| <p style="color: #8A8D9F; margin: 0; font-size: 0.9rem;">Waiting for PDF Upload...</p> | |
| </div> | |
| """, | |
| gr.update(interactive=False, placeholder="Upload a PDF on the left first to enable questioning..."), | |
| gr.update(visible=False), | |
| ) | |
| file_path = uploaded_file.name | |
| file_name = os.path.basename(file_path) | |
| # Read bytes to compute MD5 hash for unique tracking/logs | |
| with open(file_path, "rb") as f: | |
| file_bytes = f.read() | |
| file_hash = hashlib.md5(file_bytes).hexdigest() | |
| print(f"LOG: Parsing PDF '{file_name}' (Hash: {file_hash})...") | |
| try: | |
| loader = PyPDFLoader(file_path) | |
| docs = loader.load() | |
| chunks = RecursiveCharacterTextSplitter( | |
| chunk_size=750, chunk_overlap=75 | |
| ).split_documents(docs) | |
| print(f"LOG: Splitting complete. Total chunks created: {len(chunks)}") | |
| vectorstore = FAISS.from_documents(chunks, EMBEDDINGS) | |
| print("LOG: Vector store indexing finished.") | |
| status_msg = f""" | |
| <div style="background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981; border-radius: 8px; padding: 12px; margin-top: 10px;"> | |
| <p style="color: #10b981; margin: 0; font-weight: 600;">β Index Built Successfully!</p> | |
| <p style="color: #cbd5e1; margin: 4px 0 0 0; font-size: 0.85rem;"> | |
| <b>Document:</b> {file_name} <br> | |
| <b>Size:</b> {os.path.getsize(file_path) / 1024:.1f} KB <br> | |
| <b>Total Segments:</b> {len(chunks)} Chunks | |
| </p> | |
| </div> | |
| """ | |
| return ( | |
| (chunks, vectorstore), | |
| status_msg, | |
| gr.update(interactive=True, placeholder="Ask a question about the document..."), | |
| gr.update(visible=True), | |
| ) | |
| except Exception as e: | |
| err_msg = f""" | |
| <div style="background: rgba(239, 68, 68, 0.1); border: 1px solid #ef4444; border-radius: 8px; padding: 12px; margin-top: 10px;"> | |
| <p style="color: #ef4444; margin: 0; font-weight: 600;">β οΈ Failed to process PDF</p> | |
| <p style="color: #cbd5e1; margin: 4px 0 0 0; font-size: 0.85rem;">{str(e)}</p> | |
| </div> | |
| """ | |
| return None, err_msg, gr.update(interactive=False), gr.update(visible=False) | |
| def answer_question_gradio(question, state, api_key_input): | |
| api_key = api_key_input.strip() or GOOGLE_API_KEY | |
| if not api_key: | |
| return ( | |
| "β οΈ **Error:** `GOOGLE_API_KEY` is missing. Please add it to your `.env` file or enter it in the input field.", | |
| "", | |
| ) | |
| if not state: | |
| return "β οΈ **Error:** No active document. Please upload a PDF file first.", "" | |
| if not question.strip(): | |
| return "β οΈ Please type in a question.", "" | |
| chunks, vectorstore = state | |
| try: | |
| # Retrieve context | |
| relevant_docs = vectorstore.similarity_search(question, k=4) | |
| # Load LLM | |
| llm = ChatGoogleGenerativeAI( | |
| google_api_key=api_key, | |
| model="gemini-2.5-flash", | |
| temperature=0.3, | |
| max_retries=0, | |
| ) | |
| # Modern LCEL chain β replaces deprecated load_qa_chain | |
| prompt = PromptTemplate.from_template( | |
| "Use the following context to answer the question as accurately as possible.\n\n" | |
| "Context:\n{context}\n\n" | |
| "Question: {question}\n\n" | |
| "Answer:" | |
| ) | |
| chain = prompt | llm | StrOutputParser() | |
| context = "\n\n".join(doc.page_content for doc in relevant_docs) | |
| answer = chain.invoke({"context": context, "question": question}) | |
| # Format sources as HTML/Markdown citations | |
| sources_html = "" | |
| for i, doc in enumerate(relevant_docs, 1): | |
| page = doc.metadata.get("page", 0) + 1 | |
| sources_html += f""" | |
| <div style="background: rgba(255, 255, 255, 0.02); border-left: 3px solid #8b5cf6; padding: 10px; margin-bottom: 12px; border-radius: 6px; border: 1px solid rgba(255, 255, 255, 0.04);"> | |
| <span style="font-weight: 600; color: #a78bfa; font-size: 0.82rem; background: rgba(139, 92, 246, 0.1); padding: 2px 6px; border-radius: 4px;">Chunk {i} β Page {page}</span> | |
| <p style="margin: 6px 0 0 0; font-size: 0.88rem; color: #cbd5e1; font-style: italic; line-height: 1.4;"> | |
| "...{doc.page_content.strip()}..." | |
| </p> | |
| </div> | |
| """ | |
| return answer, sources_html | |
| except Exception as e: | |
| err = str(e) | |
| if "429" in err or "quota" in err.lower(): | |
| return ( | |
| "β οΈ **Rate Limit Hit:** You have exceeded the free tier quota for Gemini API. Please wait a minute and try again.", | |
| "", | |
| ) | |
| return f"β οΈ **Error:** {err}", "" | |
| # Custom Premium CSS Styling (DocuMind Theme) | |
| custom_css = """ | |
| body { | |
| background-color: #0d0e15 !important; | |
| color: #f3f4f6 !important; | |
| } | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| margin: 0 auto !important; | |
| font-family: 'Plus Jakarta Sans', -apple-system, sans-serif !important; | |
| } | |
| /* Title and Header styling */ | |
| .header-container { | |
| text-align: center; | |
| padding: 2rem 0; | |
| margin-bottom: 1.5rem; | |
| border-bottom: 1px solid rgba(255, 255, 255, 0.05); | |
| } | |
| .header-container h1 { | |
| font-size: 3rem; | |
| font-weight: 800; | |
| background: linear-gradient(135deg, #3B82F6 0%, #8B5CF6 50%, #06B6D4 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| letter-spacing: -1.5px; | |
| margin: 0; | |
| } | |
| .header-container p { | |
| color: #8A8D9F; | |
| font-size: 1.15rem; | |
| margin: 8px 0 0 0; | |
| } | |
| /* Primary Button Gradient */ | |
| button.primary { | |
| background: linear-gradient(135deg, #8B5CF6 0%, #3B82F6 100%) !important; | |
| color: white !important; | |
| border: none !important; | |
| font-weight: 600 !important; | |
| transition: all 0.2s ease !important; | |
| border-radius: 8px !important; | |
| } | |
| button.primary:hover { | |
| transform: translateY(-1px); | |
| box-shadow: 0 4px 12px rgba(139, 92, 246, 0.35) !important; | |
| } | |
| """ | |
| with gr.Blocks(title="DocuMind AI") as demo: | |
| gr.HTML( | |
| """ | |
| <div class="header-container"> | |
| <h1>DocuMind AI</h1> | |
| <p>Interactive PDF Question Answering Powered by LangChain, FAISS & Gemini</p> | |
| </div> | |
| """ | |
| ) | |
| # State to hold chunks and vectorstore | |
| doc_state = gr.State(None) | |
| with gr.Row(): | |
| # Sidebar / Left column | |
| with gr.Column(scale=1, min_width=320): | |
| gr.Markdown("### π Upload Document") | |
| pdf_file = gr.File( | |
| label="Select or Drop PDF File", | |
| file_types=[".pdf"], | |
| file_count="single", | |
| ) | |
| status_panel = gr.HTML( | |
| """ | |
| <div style="background: rgba(255, 255, 255, 0.02); border: 1px dashed rgba(255, 255, 255, 0.1); border-radius: 8px; padding: 12px; text-align: center;"> | |
| <p style="color: #8A8D9F; margin: 0; font-size: 0.9rem;">Waiting for PDF Upload...</p> | |
| </div> | |
| """ | |
| ) | |
| gr.Markdown("### π API Authentication") | |
| api_key_box = gr.Textbox( | |
| label="Google API Key Override (Optional)", | |
| placeholder="AIzaSy... (Leave empty to use .env key)", | |
| type="password", | |
| ) | |
| gr.Markdown( | |
| """ | |
| <div style="font-size: 0.82rem; color: #8A8D9F; margin-top: 15px; border-top: 1px solid rgba(255, 255, 255, 0.05); padding-top: 10px; line-height: 1.5;"> | |
| π‘ <b>Private & Local Processing:</b><br> | |
| Your document is parsed locally inside the workspace environment. The FAISS vector database operates securely in memory. | |
| </div> | |
| """ | |
| ) | |
| # Workspace Panel / Right column | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π¬ Ask Questions") | |
| query_box = gr.Textbox( | |
| label="Ask a question about the document:", | |
| placeholder="Upload a PDF on the left first to enable questioning...", | |
| interactive=False, | |
| lines=2, | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("Submit Question", variant="primary") | |
| clear_btn = gr.Button("Clear Workspace") | |
| gr.Markdown("### π‘ Answer Output") | |
| answer_box = gr.Markdown( | |
| "*(Upload a PDF and submit your question to view the AI output here.)*" | |
| ) | |
| sources_accordion = gr.Accordion( | |
| "π Relevant Source Citations Used", open=True, visible=False | |
| ) | |
| with sources_accordion: | |
| sources_box = gr.HTML("No document sources loaded.") | |
| # Wiring Event Handlers | |
| pdf_file.change( | |
| fn=build_vectorstore_gradio, | |
| inputs=[pdf_file], | |
| outputs=[doc_state, status_panel, query_box, sources_accordion], | |
| ) | |
| # When clear is clicked | |
| def clear_workspace(): | |
| return ( | |
| "", | |
| "*(Upload a PDF and submit your question to view the AI output here.)*", | |
| "No document sources loaded.", | |
| ) | |
| clear_btn.click( | |
| fn=clear_workspace, | |
| inputs=[], | |
| outputs=[query_box, answer_box, sources_box], | |
| ) | |
| # Submitting questions | |
| submit_btn.click( | |
| fn=answer_question_gradio, | |
| inputs=[query_box, doc_state, api_key_box], | |
| outputs=[answer_box, sources_box], | |
| ) | |
| query_box.submit( | |
| fn=answer_question_gradio, | |
| inputs=[query_box, doc_state, api_key_box], | |
| outputs=[answer_box, sources_box], | |
| ) | |
| if __name__ == "__main__": | |
| asyncio.set_event_loop(asyncio.new_event_loop()) | |
| demo.launch( | |
| server_name="127.0.0.1", | |
| server_port=7860, | |
| share=False, | |
| theme=gr.themes.Default(), | |
| css=custom_css, | |
| ) |