Spaces:

charesz
/

csv-redaer-bot

Sleeping

App Files Files Community

charesz commited on Oct 21, 2025

Commit

48cc861

verified ·

1 Parent(s): 99de058

Update utils.py

Browse files

Files changed (1) hide show

utils.py +12 -22

utils.py CHANGED Viewed

@@ -4,22 +4,14 @@ from langchain_community.llms import HuggingFaceHub
 from langchain_experimental.agents import create_pandas_dataframe_agent
 from typing import Tuple
 import requests
-from io import BytesIO
 def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "mistralai/Mistral-7B-Instruct-v0.2") -> Tuple[str, str]:
     """
-    Reads a CSV from file-like bytes, builds a LangChain pandas-agent with HuggingFaceHub LLM,
-    runs the user query and returns (answer, debug_info).
-    - file_bytes: bytes of the uploaded CSV file (Streamlit provides)
-    - query: user's natural language question
-    - hf_token: huggingface token (string)
-    - repo_id: huggingface repo id for the model to use (e.g. 'mistralai/mistral-7b-Instruct-v0.1')
-    Returns: tuple (answer_text, debug_text)
     """
     try:
-        # Read CSV — attempt common encodings and fallback
         try:
             df = pd.read_csv(file_bytes)
         except Exception:
@@ -28,28 +20,26 @@ def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "
     except Exception as e:
         return f"Error reading CSV: {e}", ""
-    # Limit columns if dataset is extremely wide
     MAX_COLS = 200
     if df.shape[1] > MAX_COLS:
         df = df.iloc[:, :MAX_COLS]
-    # --- TEMPORARY TEST BLOCK ---
-    # This is to isolate the token/API error
     try:
         test_url = f"https://api-inference.huggingface.co/models/{repo_id}"
         test_headers = {"Authorization": f"Bearer {hf_token}"}
-        test_payload = {"inputs": "Test connection"}
         response = requests.post(test_url, headers=test_headers, json=test_payload)
         if response.status_code == 403:
-            print("TEMPORARY TEST FAILED: Received 403 status code from Hugging Face API.")
         response.raise_for_status()
     except requests.exceptions.HTTPError as e:
-        return "", f"Isolation Test Error: Failed to connect to Hugging Face model {repo_id}. HTTP Error: {e}"
     except Exception as e:
-        return "", f"Isolation Test Error: Generic connection failure: {e}"
-    # --- END TEMPORARY TEST BLOCK ---
-    # Build the LLM wrapper for Hugging Face Hub
     try:
         llm = HuggingFaceHub(
             repo_id=repo_id,
@@ -59,13 +49,13 @@ def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "
     except Exception as e:
         return "", f"Error creating HuggingFaceHub LLM: {e}"
-    # Create pandas agent
     try:
         agent = create_pandas_dataframe_agent(llm, df, verbose=False)
     except Exception as e:
         return "", f"Error creating LangChain pandas agent: {e}"
-    # Run query (wrap in try/except to capture agent errors)
     try:
         answer = agent.run(query)
     except Exception as e:

 from langchain_experimental.agents import create_pandas_dataframe_agent
 from typing import Tuple
 import requests
 def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "mistralai/Mistral-7B-Instruct-v0.2") -> Tuple[str, str]:
     """
+    Reads a CSV, checks Hugging Face model access, builds a LangChain agent,
+    and returns (answer, debug_info).
     """
+    # --- Step 1: Read CSV ---
     try:
         try:
             df = pd.read_csv(file_bytes)
         except Exception:
     except Exception as e:
         return f"Error reading CSV: {e}", ""
     MAX_COLS = 200
     if df.shape[1] > MAX_COLS:
         df = df.iloc[:, :MAX_COLS]
+    # --- Step 2: Test Hugging Face token/model access ---
     try:
         test_url = f"https://api-inference.huggingface.co/models/{repo_id}"
         test_headers = {"Authorization": f"Bearer {hf_token}"}
+        test_payload = {"inputs": "Hello"}
         response = requests.post(test_url, headers=test_headers, json=test_payload)
         if response.status_code == 403:
+            return "", f"Access Denied (403): Your token does not have permission to use {repo_id}."
         response.raise_for_status()
     except requests.exceptions.HTTPError as e:
+        return "", f"HTTP Error while accessing {repo_id}: {e}"
     except Exception as e:
+        return "", f"Generic connection failure: {e}"
+    # --- Step 3: Build the LLM ---
     try:
         llm = HuggingFaceHub(
             repo_id=repo_id,
     except Exception as e:
         return "", f"Error creating HuggingFaceHub LLM: {e}"
+    # --- Step 4: Create pandas agent ---
     try:
         agent = create_pandas_dataframe_agent(llm, df, verbose=False)
     except Exception as e:
         return "", f"Error creating LangChain pandas agent: {e}"
+    # --- Step 5: Run query ---
     try:
         answer = agent.run(query)
     except Exception as e: