Spaces:

charesz
/

csv-redaer-bot

Sleeping

App Files Files Community

charesz commited on Oct 21, 2025

Commit

c39a167

verified ·

1 Parent(s): 48cc861

Update utils.py

Browse files

Files changed (1) hide show

utils.py +46 -51

utils.py CHANGED Viewed

@@ -1,64 +1,59 @@
 # utils.py
 import pandas as pd
-from langchain_community.llms import HuggingFaceHub
-from langchain_experimental.agents import create_pandas_dataframe_agent
-from typing import Tuple
-import requests
-def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "mistralai/Mistral-7B-Instruct-v0.2") -> Tuple[str, str]:
     """
-    Reads a CSV, checks Hugging Face model access, builds a LangChain agent,
-    and returns (answer, debug_info).
     """
-    # --- Step 1: Read CSV ---
     try:
         try:
             df = pd.read_csv(file_bytes)
         except Exception:
             file_bytes.seek(0)
             df = pd.read_csv(file_bytes, encoding="latin1")
-    except Exception as e:
-        return f"Error reading CSV: {e}", ""
-    MAX_COLS = 200
-    if df.shape[1] > MAX_COLS:
-        df = df.iloc[:, :MAX_COLS]
-    # --- Step 2: Test Hugging Face token/model access ---
-    try:
-        test_url = f"https://api-inference.huggingface.co/models/{repo_id}"
-        test_headers = {"Authorization": f"Bearer {hf_token}"}
-        test_payload = {"inputs": "Hello"}
-        response = requests.post(test_url, headers=test_headers, json=test_payload)
-        if response.status_code == 403:
-            return "", f"Access Denied (403): Your token does not have permission to use {repo_id}."
-        response.raise_for_status()
-    except requests.exceptions.HTTPError as e:
-        return "", f"HTTP Error while accessing {repo_id}: {e}"
-    except Exception as e:
-        return "", f"Generic connection failure: {e}"
-    # --- Step 3: Build the LLM ---
-    try:
-        llm = HuggingFaceHub(
-            repo_id=repo_id,
-            huggingfacehub_api_token=hf_token,
-            model_kwargs={"temperature": 0.0, "max_new_tokens": 512},
         )
     except Exception as e:
-        return "", f"Error creating HuggingFaceHub LLM: {e}"
-    # --- Step 4: Create pandas agent ---
-    try:
-        agent = create_pandas_dataframe_agent(llm, df, verbose=False)
-    except Exception as e:
-        return "", f"Error creating LangChain pandas agent: {e}"
-    # --- Step 5: Run query ---
-    try:
-        answer = agent.run(query)
-    except Exception as e:
-        return "", f"Agent runtime error: {e}"
-    return answer, ""

 # utils.py
 import pandas as pd
+import streamlit as st
+from huggingface_hub import InferenceClient
+# Initialize Hugging Face Inference client using the secret
+hf_token = st.secrets.get("HF_TOKEN")
+if not hf_token:
+    st.error("HF_TOKEN not found in secrets. Please add it.")
+    st.stop()
+client = InferenceClient(token=hf_token)
+def query_agent_from_csv(file_bytes, user_query, model_repo="mistralai/Mistral-7B-Instruct-v0.3"):
     """
+    Reads a CSV and queries the Hugging Face Mistral model.
+    Returns the model's answer as string.
     """
     try:
+        # --- Step 1: Load CSV ---
         try:
             df = pd.read_csv(file_bytes)
         except Exception:
             file_bytes.seek(0)
             df = pd.read_csv(file_bytes, encoding="latin1")
+        # Limit columns to avoid huge inputs
+        MAX_COLS = 50
+        if df.shape[1] > MAX_COLS:
+            df = df.iloc[:, :MAX_COLS]
+        # --- Step 2: Summarize dataset for model context ---
+        summary = f"The dataset has {df.shape[0]} rows and {df.shape[1]} columns.\n"
+        summary += "Columns: " + ", ".join(df.columns[:10])
+        if df.shape[1] > 10:
+            summary += ", ..."
+        # --- Step 3: Build messages for chat API ---
+        messages = [
+            {"role": "system", "content": (
+                "You are a professional data analyst. "
+                "Analyze the CSV dataset and answer questions clearly with bullet points or tables if helpful."
+            )},
+            {"role": "user", "content": f"Dataset summary:\n{summary}"},
+            {"role": "user", "content": f"Question: {user_query}"}
+        ]
+        # --- Step 4: Query the model ---
+        response = client.chat_completion(
+            model=model_repo,
+            messages=messages,
+            max_tokens=512
         )
+        answer = response.choices[0].message["content"].strip()
+        return answer, ""
     except Exception as e:
+        return "", f"Error querying the model: {e}"