Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_groq import ChatGroq | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.memory import ConversationBufferMemory | |
| # --- 1. SETUP API --- | |
| # In Hugging Face, we use os.environ to get the secret | |
| api_key = os.environ.get("GROQ_API") | |
| # --- 2. FILE LOADING LOGIC --- | |
| def load_any(path: str): | |
| p = path.lower() | |
| if p.endswith(".pdf"): return PyPDFLoader(path).load() | |
| if p.endswith(".txt"): return TextLoader(path, encoding="utf-8").load() | |
| if p.endswith(".docx"): return Docx2txtLoader(path).load() | |
| return [] | |
| # --- 3. PROCESSING FUNCTION --- | |
| # This function runs when the user clicks "Build Chatbot" | |
| def process_files(files): | |
| if not files: | |
| return None, "⚠️ Please upload at least one file." | |
| if not api_key: | |
| return None, "❌ Error: GROQ_API key not found in Secrets." | |
| try: | |
| # Load Documents | |
| docs = [] | |
| for file_obj in files: | |
| # Gradio passes file objects, we need their paths | |
| docs.extend(load_any(file_obj.name)) | |
| if not docs: | |
| return None, "⚠️ No readable text found in files." | |
| # Split Text | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = splitter.split_documents(docs) | |
| # Create Embeddings & Vector Store | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| db = FAISS.from_documents(chunks, embeddings) | |
| retriever = db.as_retriever(search_kwargs={"k": 4}) | |
| # Create Chain | |
| llm = ChatGroq( | |
| groq_api_key=api_key, | |
| model="llama-3.3-70b-versatile", | |
| temperature=0 | |
| ) | |
| memory = ConversationBufferMemory( | |
| memory_key="chat_history", | |
| return_messages=True, | |
| output_key="answer" | |
| ) | |
| chain = ConversationalRetrievalChain.from_llm( | |
| llm=llm, | |
| retriever=retriever, | |
| memory=memory, | |
| return_source_documents=True, | |
| output_key="answer" | |
| ) | |
| return chain, f"✅ Success! Processed {len(chunks)} chunks. You can chat now." | |
| except Exception as e: | |
| return None, f"❌ Error: {str(e)}" | |
| # --- 4. CHAT FUNCTION --- | |
| def chat_function(message, history, chain): | |
| if not chain: | |
| return "⚠️ Please upload files and click 'Build Chatbot' first." | |
| try: | |
| res = chain.invoke({"question": message}) | |
| answer = res["answer"] | |
| # Format Sources | |
| sources = [] | |
| for d in res.get("source_documents", []): | |
| src = os.path.basename(d.metadata.get("source", "unknown")) | |
| text = (d.page_content or "").replace("\n", " ")[:100] + "..." | |
| sources.append(f"- {src}: {text}") | |
| final_answer = answer + "\n\n---\n**Sources:**\n" + "\n".join(sources) | |
| return final_answer | |
| except Exception as e: | |
| return f"❌ Error generating answer: {str(e)}" | |
| # --- 5. BUILD UI --- | |
| with gr.Blocks(title="RAG Chatbot") as demo: | |
| gr.Markdown("# 📚 RAG Chatbot (LangChain + Groq)") | |
| # Store the RAG chain in the user's browser session (State) | |
| chain_state = gr.State(None) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.File(file_count="multiple", label="Upload PDF/TXT/DOCX") | |
| build_btn = gr.Button("Build Chatbot", variant="primary") | |
| status_output = gr.Textbox(label="Status", interactive=False) | |
| with gr.Column(scale=2): | |
| chatbot = gr.ChatInterface( | |
| fn=chat_function, | |
| additional_inputs=[chain_state] # Pass the chain to the chat function | |
| ) | |
| # Connect the "Build" button to the processing function | |
| build_btn.click( | |
| fn=process_files, | |
| inputs=[file_input], | |
| outputs=[chain_state, status_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |