Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """app.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1DSQjlXwb4UHeF4RILlBwwHfN5jRyyvov | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import os | |
| from tempfile import NamedTemporaryFile | |
| from uuid import uuid4 | |
| # LangChain & Groq imports for embedding and LLM access | |
| from langchain_community.document_loaders import CSVLoader | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_groq import ChatGroq | |
| # ========== API Key Setup ========== | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| if not GROQ_API_KEY or not GROQ_API_KEY.strip(): | |
| raise ValueError("❌ Please provide GROQ_API_KEY as a Space secret (Settings > Secrets).") | |
| # ========== Global State ========== | |
| vectorstore = None | |
| df = None | |
| def safe_delete_collection(vs): | |
| """Deletes Chroma collection if exists (prevents stale retrieval).""" | |
| try: | |
| if vs is not None and hasattr(vs, "delete_collection"): | |
| vs.delete_collection() | |
| except Exception: | |
| pass | |
| def load_and_index_csv(file): | |
| """ | |
| Load the CSV, build vectorstore with embeddings, and provide preview/status. | |
| """ | |
| global vectorstore, df # Moved global declaration to the top | |
| if file is None: | |
| return None, "⚠️ Please upload a CSV.", None | |
| try: | |
| # Remove the prior index/collection and state | |
| safe_delete_collection(vectorstore) | |
| vectorstore = None | |
| df = None | |
| # Load user's CSV | |
| df = pd.read_csv(file.name) | |
| csv_text = df.to_csv(index=False) | |
| with NamedTemporaryFile(delete=False, suffix=".csv") as tmp: | |
| tmp.write(csv_text.encode("utf-8")) | |
| tmp_path = tmp.name | |
| # Create new index with unique collection for this upload | |
| loader = CSVLoader(file_path=tmp_path) | |
| docs = loader.load() | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'} | |
| ) | |
| collection_name = f"csvpal-{uuid4().hex[:8]}" | |
| vectorstore = Chroma.from_documents( | |
| documents=docs, | |
| embedding=embeddings, | |
| collection_name=collection_name | |
| ) | |
| preview_html = df.head().to_html(index=False) | |
| status_msg = f"✅ CSV Loaded! {len(df)} rows, {len(df.columns)} columns." | |
| return preview_html, status_msg, True | |
| except Exception as e: | |
| return None, f"Error loading CSV: {e}", False | |
| def answer_question(user_input): | |
| """ | |
| Retrieve top-10 relevant rows and ask the LLM to answer strictly using them. | |
| """ | |
| global vectorstore, df | |
| if vectorstore is None or df is None: | |
| return "⚠️ Please upload and load a CSV first." | |
| if not user_input or not user_input.strip(): | |
| return "⚠️ Please enter a question." | |
| # Retrieval and strict grounding | |
| k = 10 | |
| try: | |
| docs_and_scores = vectorstore.similarity_search_with_score(user_input, k=k) | |
| except Exception as e: | |
| return f"❌ Retrieval error: {e}" | |
| if not docs_and_scores: | |
| return "⚠️ No relevant data found for your question in the CSV." | |
| try: | |
| context = "\n".join(getattr(doc, "page_content", getattr(doc, "content", "")) for doc, _ in docs_and_scores) | |
| except Exception: | |
| context = "" | |
| prompt = f"""You are an expert data analyst AI assistant. Use ONLY the following CSV data to answer the question below. | |
| If the answer cannot be found in the data, say 'No sufficient data to answer.' | |
| Do NOT guess or invent information. | |
| Data: | |
| {context} | |
| Question: | |
| {user_input} | |
| Please answer clearly and provide Python pandas code using DataFrame `df` to reproduce the answer exactly. | |
| """ | |
| try: | |
| chat_model = ChatGroq(model_name="llama-3.3-70b-versatile", api_key=GROQ_API_KEY) | |
| response = chat_model.invoke(prompt) | |
| if isinstance(response, str): | |
| answer = response | |
| elif hasattr(response, "content"): | |
| answer = response.content | |
| else: | |
| answer = str(response) | |
| except Exception as e: | |
| answer = f"❌ Error generating answer: {e}" | |
| return answer | |
| # ========== Gradio UI ========== | |
| with gr.Blocks(title="CsvPal-AI: Chat with Your CSV (RAG-powered Q&A)") as demo: | |
| gr.Markdown( | |
| """ | |
| # 📊 CsvPal-AI | |
| Upload a CSV file and ask natural language questions. | |
| Receive clear, data-grounded answers and Python pandas code snippets. | |
| """ | |
| ) | |
| with gr.Row(): | |
| csv_file = gr.File(label="📂 Upload CSV", file_types=[".csv"]) | |
| status = gr.Textbox(label="Status", interactive=False, max_lines=3) | |
| df_preview = gr.HTML(label="🧾 CSV Preview") | |
| gr.Markdown( | |
| "⚠️ Note: Answers are based on the top 10 most relevant rows retrieved from your CSV for each question." | |
| ) | |
| gr.Markdown("----") | |
| user_input = gr.Textbox( | |
| label="📝 Ask your question:", | |
| placeholder="e.g. What analysis can be done in the CSV data?", | |
| interactive=False, | |
| lines=1 | |
| ) | |
| with gr.Row(): | |
| ask_btn = gr.Button("🤖 Get Answer", interactive=False) | |
| clear_btn = gr.Button("Clear") | |
| answer_output = gr.Textbox( | |
| label="Answer", | |
| lines=15, | |
| interactive=False | |
| ) | |
| # UI clear on manual file clear | |
| def clear_all_from_file(): | |
| return "", "", gr.update(value="", interactive=False), gr.update(interactive=False), "" | |
| # On file upload, index fresh and enable | |
| def on_file_upload(file): | |
| preview, msg, enable_chat = load_and_index_csv(file) | |
| return ( | |
| preview or "", | |
| msg, | |
| gr.update(value="", interactive=bool(enable_chat)), | |
| gr.update(interactive=bool(enable_chat)), | |
| "" # answer_output | |
| ) | |
| # Global clear: wipes index, df, and UI | |
| def clear_everything(): | |
| global vectorstore, df | |
| safe_delete_collection(vectorstore) | |
| vectorstore = None | |
| df = None | |
| return ( | |
| None, # csv_file (clears) | |
| "", # df_preview | |
| "", # status | |
| gr.update(value="", interactive=False), # user_input clear+disable | |
| gr.update(interactive=False), # ask_btn disable | |
| "" # answer_output clear | |
| ) | |
| csv_file.upload( | |
| on_file_upload, | |
| inputs=[csv_file], | |
| outputs=[df_preview, status, user_input, ask_btn, answer_output] | |
| ) | |
| csv_file.clear(clear_all_from_file, None, [df_preview, status, user_input, ask_btn, answer_output]) | |
| ask_btn.click(answer_question, inputs=[user_input], outputs=[answer_output]) | |
| user_input.submit(answer_question, inputs=[user_input], outputs=[answer_output]) | |
| clear_btn.click( | |
| clear_everything, | |
| inputs=[], | |
| outputs=[csv_file, df_preview, status, user_input, ask_btn, answer_output] | |
| ) | |
| demo.launch() |