Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -4,22 +4,14 @@ from langchain_community.llms import HuggingFaceHub
|
|
| 4 |
from langchain_experimental.agents import create_pandas_dataframe_agent
|
| 5 |
from typing import Tuple
|
| 6 |
import requests
|
| 7 |
-
from io import BytesIO
|
| 8 |
|
| 9 |
def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "mistralai/Mistral-7B-Instruct-v0.2") -> Tuple[str, str]:
|
| 10 |
"""
|
| 11 |
-
Reads a CSV
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
- file_bytes: bytes of the uploaded CSV file (Streamlit provides)
|
| 15 |
-
- query: user's natural language question
|
| 16 |
-
- hf_token: huggingface token (string)
|
| 17 |
-
- repo_id: huggingface repo id for the model to use (e.g. 'mistralai/mistral-7b-Instruct-v0.1')
|
| 18 |
-
|
| 19 |
-
Returns: tuple (answer_text, debug_text)
|
| 20 |
"""
|
|
|
|
| 21 |
try:
|
| 22 |
-
# Read CSV — attempt common encodings and fallback
|
| 23 |
try:
|
| 24 |
df = pd.read_csv(file_bytes)
|
| 25 |
except Exception:
|
|
@@ -28,28 +20,26 @@ def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "
|
|
| 28 |
except Exception as e:
|
| 29 |
return f"Error reading CSV: {e}", ""
|
| 30 |
|
| 31 |
-
# Limit columns if dataset is extremely wide
|
| 32 |
MAX_COLS = 200
|
| 33 |
if df.shape[1] > MAX_COLS:
|
| 34 |
df = df.iloc[:, :MAX_COLS]
|
| 35 |
|
| 36 |
-
# ---
|
| 37 |
-
# This is to isolate the token/API error
|
| 38 |
try:
|
| 39 |
test_url = f"https://api-inference.huggingface.co/models/{repo_id}"
|
| 40 |
test_headers = {"Authorization": f"Bearer {hf_token}"}
|
| 41 |
-
test_payload = {"inputs": "
|
| 42 |
response = requests.post(test_url, headers=test_headers, json=test_payload)
|
|
|
|
| 43 |
if response.status_code == 403:
|
| 44 |
-
|
| 45 |
response.raise_for_status()
|
| 46 |
except requests.exceptions.HTTPError as e:
|
| 47 |
-
return "", f"
|
| 48 |
except Exception as e:
|
| 49 |
-
return "", f"
|
| 50 |
-
# --- END TEMPORARY TEST BLOCK ---
|
| 51 |
|
| 52 |
-
# Build the LLM
|
| 53 |
try:
|
| 54 |
llm = HuggingFaceHub(
|
| 55 |
repo_id=repo_id,
|
|
@@ -59,13 +49,13 @@ def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "
|
|
| 59 |
except Exception as e:
|
| 60 |
return "", f"Error creating HuggingFaceHub LLM: {e}"
|
| 61 |
|
| 62 |
-
# Create pandas agent
|
| 63 |
try:
|
| 64 |
agent = create_pandas_dataframe_agent(llm, df, verbose=False)
|
| 65 |
except Exception as e:
|
| 66 |
return "", f"Error creating LangChain pandas agent: {e}"
|
| 67 |
|
| 68 |
-
#
|
| 69 |
try:
|
| 70 |
answer = agent.run(query)
|
| 71 |
except Exception as e:
|
|
|
|
| 4 |
from langchain_experimental.agents import create_pandas_dataframe_agent
|
| 5 |
from typing import Tuple
|
| 6 |
import requests
|
|
|
|
| 7 |
|
| 8 |
def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "mistralai/Mistral-7B-Instruct-v0.2") -> Tuple[str, str]:
|
| 9 |
"""
|
| 10 |
+
Reads a CSV, checks Hugging Face model access, builds a LangChain agent,
|
| 11 |
+
and returns (answer, debug_info).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"""
|
| 13 |
+
# --- Step 1: Read CSV ---
|
| 14 |
try:
|
|
|
|
| 15 |
try:
|
| 16 |
df = pd.read_csv(file_bytes)
|
| 17 |
except Exception:
|
|
|
|
| 20 |
except Exception as e:
|
| 21 |
return f"Error reading CSV: {e}", ""
|
| 22 |
|
|
|
|
| 23 |
MAX_COLS = 200
|
| 24 |
if df.shape[1] > MAX_COLS:
|
| 25 |
df = df.iloc[:, :MAX_COLS]
|
| 26 |
|
| 27 |
+
# --- Step 2: Test Hugging Face token/model access ---
|
|
|
|
| 28 |
try:
|
| 29 |
test_url = f"https://api-inference.huggingface.co/models/{repo_id}"
|
| 30 |
test_headers = {"Authorization": f"Bearer {hf_token}"}
|
| 31 |
+
test_payload = {"inputs": "Hello"}
|
| 32 |
response = requests.post(test_url, headers=test_headers, json=test_payload)
|
| 33 |
+
|
| 34 |
if response.status_code == 403:
|
| 35 |
+
return "", f"Access Denied (403): Your token does not have permission to use {repo_id}."
|
| 36 |
response.raise_for_status()
|
| 37 |
except requests.exceptions.HTTPError as e:
|
| 38 |
+
return "", f"HTTP Error while accessing {repo_id}: {e}"
|
| 39 |
except Exception as e:
|
| 40 |
+
return "", f"Generic connection failure: {e}"
|
|
|
|
| 41 |
|
| 42 |
+
# --- Step 3: Build the LLM ---
|
| 43 |
try:
|
| 44 |
llm = HuggingFaceHub(
|
| 45 |
repo_id=repo_id,
|
|
|
|
| 49 |
except Exception as e:
|
| 50 |
return "", f"Error creating HuggingFaceHub LLM: {e}"
|
| 51 |
|
| 52 |
+
# --- Step 4: Create pandas agent ---
|
| 53 |
try:
|
| 54 |
agent = create_pandas_dataframe_agent(llm, df, verbose=False)
|
| 55 |
except Exception as e:
|
| 56 |
return "", f"Error creating LangChain pandas agent: {e}"
|
| 57 |
|
| 58 |
+
# --- Step 5: Run query ---
|
| 59 |
try:
|
| 60 |
answer = agent.run(query)
|
| 61 |
except Exception as e:
|