Spaces:
Sleeping
Sleeping
| import os | |
| import faiss | |
| import numpy as np | |
| import requests | |
| import gradio as gr | |
| from openai import OpenAI | |
| from pypdf import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| # Globals (shared state in Gradio) | |
| embed_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| index = None | |
| chunks = [] | |
| # Add after globals: | |
| chat_history = [] # Session memory | |
| def chat(user_input, history): | |
| global chat_history | |
| # Build full context (PDF + conversation history) | |
| full_context = "\n".join([f"User: {h['content']}\nBot: {h.get('bot_response', '')}" | |
| for h in chat_history[-5:]]) if chat_history else "" | |
| answer = generate_answer(user_input, full_context) | |
| # Store in memory | |
| chat_history.append({"user": user_input, "bot": answer}) | |
| # Update UI history | |
| new_history = history + [ | |
| {"role": "user", "content": user_input}, | |
| {"role": "assistant", "content": answer} | |
| ] | |
| return new_history, new_history | |
| def generate_answer(query, conversation_context=""): | |
| if index is None: | |
| return "β οΈ Please load a PDF first." | |
| rag_context = retrieve(query) | |
| rag_text = "\n\n".join(rag_context) | |
| # β Combine RAG + Conversation Memory | |
| full_prompt = f"""You are a smart financial AI assistant that remembers conversations. | |
| Previous conversation: | |
| {conversation_context} | |
| PDF Context (use ONLY this for facts): | |
| {rag_text} | |
| Question: {query} | |
| Respond naturally and helpfully, referencing past discussion when relevant.""" | |
| response = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| messages=[{"role": "user", "content": full_prompt}], | |
| temperature=0.7, | |
| max_tokens=600 | |
| ) | |
| return response.choices[0].message.content | |
| # Groq client with HF Secrets | |
| client = OpenAI( | |
| api_key=os.getenv("GROQ_API_KEY"), | |
| base_url="https://api.groq.com/openai/v1", | |
| ) | |
| def convert_drive_link(link): | |
| try: | |
| file_id = link.split("/d/")[1].split("/")[0] | |
| return f"https://drive.google.com/uc?id={file_id}" | |
| except: | |
| return link | |
| def load_pdf_from_link(link): | |
| global index, chunks | |
| url = convert_drive_link(link) | |
| PDF_PATH = "temp.pdf" | |
| response = requests.get(url) | |
| with open(PDF_PATH, "wb") as f: | |
| f.write(response.content) | |
| reader = PdfReader(PDF_PATH) | |
| texts = [page.extract_text() for page in reader.pages if page.extract_text()] | |
| # Chunking | |
| chunks = [] | |
| for t in texts: | |
| words = t.split() | |
| for i in range(0, len(words), 500): | |
| chunks.append(" ".join(words[i:i+500])) | |
| # Embeddings + FAISS | |
| embeddings = embed_model.encode(chunks) | |
| dim = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dim) | |
| index.add(np.array(embeddings).astype('float32')) | |
| return f"β PDF loaded! {len(chunks)} chunks created." | |
| def retrieve(query, k=3): | |
| if index is None: | |
| return [] | |
| q_emb = embed_model.encode([query]) | |
| distances, indices = index.search(np.array(q_emb).astype('float32'), k) | |
| return [chunks[i] for i in indices[0]] | |
| def generate_answer(query): | |
| if index is None: | |
| return "β οΈ Please load a PDF first." | |
| context = retrieve(query) | |
| context_text = "\n\n".join(context) | |
| prompt = f"""You are a financial AI assistant. | |
| Answer ONLY using the context below. | |
| Context: | |
| {context_text} | |
| Question: | |
| {query}""" | |
| # β Use currently available Groq model (April 2026) | |
| response = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", # Fast & reliable | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.7, | |
| max_tokens=500 | |
| ) | |
| return response.choices[0].message.content | |
| # ... (keep all previous code until chat function) | |
| def chat(user_input, history): | |
| answer = generate_answer(user_input) | |
| new_history = history + [ | |
| {"role": "user", "content": user_input}, | |
| {"role": "assistant", "content": answer} | |
| ] | |
| return new_history, new_history | |
| # UI (replace entirely): | |
| with gr.Blocks(title="Finance RAG") as app: | |
| gr.Markdown("# π Dynamic Finance RAG Chatbot") | |
| with gr.Row(): | |
| link_input = gr.Textbox(label="π Google Drive PDF Link", placeholder="https://drive.google.com/file/d/...") | |
| load_btn = gr.Button("π₯ Load PDF", variant="primary") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| chatbot = gr.Chatbot(height=500) | |
| msg = gr.Textbox( | |
| label="π¬ Ask about the PDF", | |
| placeholder="What are the key financial metrics?", | |
| container=True | |
| ) | |
| # Events | |
| load_btn.click(load_pdf_from_link, inputs=link_input, outputs=status) | |
| msg.submit(chat, inputs=[msg, chatbot], outputs=[chatbot, chatbot]) | |
| msg.submit(lambda: "", outputs=msg) | |
| if __name__ == "__main__": | |
| app.launch() |