| import os |
| import time |
| import threading |
| import shutil |
| from pathlib import Path |
| import gradio as gr |
| from langchain_community.document_loaders import PyPDFLoader |
| from langchain.text_splitter import RecursiveCharacterTextSplitter |
| from langchain.embeddings import HuggingFaceEmbeddings |
| from langchain_community.vectorstores import FAISS |
| from langchain.chains import ConversationalRetrievalChain, LLMChain |
| from langchain.memory import ConversationBufferMemory |
| from langchain_groq import ChatGroq |
| from langchain.prompts import PromptTemplate |
|
|
| |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
| TEMP_DIR = "temp_uploads" |
| VECTOR_DIR = "vector_stores" |
|
|
| |
| os.makedirs(TEMP_DIR, exist_ok=True) |
| os.makedirs(VECTOR_DIR, exist_ok=True) |
|
|
| class DocumentChat: |
| def __init__(self): |
| self.chain = None |
| self.db = None |
| self.current_vector_store = None |
| self.cleanup_timer = None |
| |
| |
| self.embedding_model = HuggingFaceEmbeddings( |
| model_name="sentence-transformers/all-mpnet-base-v2" |
| ) |
| |
| |
| self.llm = ChatGroq( |
| api_key=GROQ_API_KEY, |
| model_name="deepseek-r1-distill-llama-70b", |
| temperature=0.7, |
| ) |
| |
| |
| self.memory = ConversationBufferMemory( |
| memory_key="chat_history", |
| output_key="answer", |
| return_messages=True |
| ) |
|
|
| |
| |
| self.generic_template = PromptTemplate( |
| input_variables=["question"], |
| template="""You are a helpful AI assistant that can: |
| 1. Read and understand PDF documents that users upload |
| 2. Answer questions about the contents of uploaded documents |
| 3. Maintain context through conversation |
| 4. Process documents and remember their contents for the duration of the chat |
| 5. Provide accurate and relevant information from the documents |
| |
| If the user asks: {question} |
| |
| Provide a clear and helpful response about your capabilities in a structured way. |
| If the question is about the document and no document is uploaded yet, remind them to upload a document first. |
| |
| Remember to be friendly and professional in your response.""" |
| ) |
| |
| |
| self.generic_chain = LLMChain( |
| llm=self.llm, |
| prompt=self.generic_template |
| ) |
|
|
| def cleanup_files(self, vector_store_path, pdf_path): |
| """Clean up files after 10 minutes""" |
| time.sleep(600) |
| try: |
| |
| if os.path.exists(vector_store_path): |
| shutil.rmtree(vector_store_path) |
| |
| if os.path.exists(pdf_path): |
| os.remove(pdf_path) |
| |
| |
| self.chain = None |
| self.db = None |
| self.current_vector_store = None |
| |
| print(f"Cleanup completed for: {pdf_path}") |
| except Exception as e: |
| print(f"Cleanup error: {str(e)}") |
|
|
| def process_file(self, file_path): |
| try: |
| if file_path is None: |
| return "Please upload a file." |
| |
| |
| timestamp = int(time.time()) |
| vector_store_path = os.path.join(VECTOR_DIR, f"store_{timestamp}") |
| |
| |
| loader = PyPDFLoader(file_path) |
| documents = loader.load() |
| |
| |
| text_splitter = RecursiveCharacterTextSplitter( |
| chunk_size=1000, |
| chunk_overlap=200 |
| ) |
| docs = text_splitter.split_documents(documents) |
| |
| |
| self.db = FAISS.from_documents(docs, self.embedding_model) |
| self.db.save_local(vector_store_path) |
| self.current_vector_store = vector_store_path |
| |
| |
| self.chain = ConversationalRetrievalChain.from_llm( |
| llm=self.llm, |
| retriever=self.db.as_retriever( |
| search_type="similarity", |
| search_kwargs={"k": 3} |
| ), |
| memory=self.memory, |
| return_source_documents=True, |
| combine_docs_chain_kwargs={"prompt": None} |
| ) |
| |
| |
| self.cleanup_timer = threading.Thread( |
| target=self.cleanup_files, |
| args=(vector_store_path, file_path) |
| ) |
| self.cleanup_timer.daemon = True |
| self.cleanup_timer.start() |
| |
| return "✅ Document processed successfully! You can now ask questions. Note: The document and its data will be automatically deleted after 10 minutes." |
| |
| except Exception as e: |
| return f"❌ Error processing document: {str(e)}" |
|
|
| def chat(self, query): |
| |
| generic_questions = [ |
| "what can you do?", |
| "what are your capabilities?", |
| "help", |
| "what is this?", |
| "how does this work?", |
| "what are your functions?", |
| "what do you do?", |
| "how do i use this?", |
| "instructions", |
| "guide", |
| ] |
| |
| try: |
| |
| if query.lower().strip() in generic_questions: |
| |
| status = "I already have a document loaded and ready for questions." if self.chain else "No document is currently loaded. Please upload a PDF document first." |
| result = self.generic_chain.run(question=query) |
| return f"{result}\n\nCurrent Status: {status}" |
| |
| |
| if self.chain is None: |
| return ("Please upload and process a document first. " |
| "Click the 'Choose Files' button above to upload a PDF document.") |
| |
| result = self.chain({"question": query}) |
| return result['answer'] |
| |
| except Exception as e: |
| return f"Error processing your question: {str(e)}" |
|
|
| def reset(self): |
| """Reset the chat session""" |
| try: |
| |
| if self.current_vector_store and os.path.exists(self.current_vector_store): |
| shutil.rmtree(self.current_vector_store) |
| |
| |
| self.chain = None |
| self.db = None |
| self.current_vector_store = None |
| self.memory.clear() |
| |
| return "Chat session has been reset. You can upload a new document." |
| except Exception as e: |
| return f"Error resetting chat session: {str(e)}" |
|
|
| def create_demo(): |
| |
| doc_chat = DocumentChat() |
| |
| |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| gr.Markdown(""" |
| # 📚 Document Chat Interface |
| Upload a PDF document and chat with its contents. Files are automatically deleted after 10 minutes for privacy. |
| |
| ## Instructions: |
| 1. Upload a PDF document using the file upload below |
| 2. Click 'Process Document' and wait for confirmation |
| 3. Start asking questions about your document |
| 4. Use 'Reset Chat' to start fresh with a new document |
| """) |
| |
| |
| status_msg = gr.Textbox(label="Status", interactive=False) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| file_input = gr.File( |
| label="Drop your PDF here", |
| file_types=[".pdf"], |
| type="filepath" |
| ) |
| process_button = gr.Button("📄 Process Document", variant="primary") |
| reset_button = gr.Button("🔄 Reset Chat", variant="secondary") |
| |
| with gr.Column(scale=2): |
| chatbot = gr.Chatbot( |
| label="Chat History", |
| height=400, |
| bubble_full_width=False |
| ) |
| msg = gr.Textbox( |
| label="Your Question", |
| placeholder="Ask something about the document or type 'help' for assistance...", |
| lines=2 |
| ) |
| send = gr.Button("🚀 Send", variant="primary") |
| |
| |
| def user_message(message, history): |
| if not message.strip(): |
| return "", history |
| |
| response = doc_chat.chat(message) |
| history.append((message, response)) |
| return "", history |
|
|
| def reset_chat(): |
| result = doc_chat.reset() |
| return result, None |
|
|
| process_button.click( |
| fn=doc_chat.process_file, |
| inputs=[file_input], |
| outputs=[status_msg] |
| ) |
| |
| reset_button.click( |
| fn=reset_chat, |
| inputs=[], |
| outputs=[status_msg, chatbot] |
| ) |
| |
| |
| msg.submit( |
| fn=user_message, |
| inputs=[msg, chatbot], |
| outputs=[msg, chatbot] |
| ) |
| |
| send.click( |
| fn=user_message, |
| inputs=[msg, chatbot], |
| outputs=[msg, chatbot] |
| ) |
| |
| return demo |
|
|
| if __name__ == "__main__": |
| demo = create_demo() |
| demo.launch( |
| share=True, |
| show_error=True, |
| max_threads=40 |
| ) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|