charesz commited on
Commit
c39a167
·
verified ·
1 Parent(s): 48cc861

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +46 -51
utils.py CHANGED
@@ -1,64 +1,59 @@
1
  # utils.py
2
  import pandas as pd
3
- from langchain_community.llms import HuggingFaceHub
4
- from langchain_experimental.agents import create_pandas_dataframe_agent
5
- from typing import Tuple
6
- import requests
7
 
8
- def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "mistralai/Mistral-7B-Instruct-v0.2") -> Tuple[str, str]:
 
 
 
 
 
 
 
 
9
  """
10
- Reads a CSV, checks Hugging Face model access, builds a LangChain agent,
11
- and returns (answer, debug_info).
12
  """
13
- # --- Step 1: Read CSV ---
14
  try:
 
15
  try:
16
  df = pd.read_csv(file_bytes)
17
  except Exception:
18
  file_bytes.seek(0)
19
  df = pd.read_csv(file_bytes, encoding="latin1")
20
- except Exception as e:
21
- return f"Error reading CSV: {e}", ""
22
-
23
- MAX_COLS = 200
24
- if df.shape[1] > MAX_COLS:
25
- df = df.iloc[:, :MAX_COLS]
26
-
27
- # --- Step 2: Test Hugging Face token/model access ---
28
- try:
29
- test_url = f"https://api-inference.huggingface.co/models/{repo_id}"
30
- test_headers = {"Authorization": f"Bearer {hf_token}"}
31
- test_payload = {"inputs": "Hello"}
32
- response = requests.post(test_url, headers=test_headers, json=test_payload)
33
-
34
- if response.status_code == 403:
35
- return "", f"Access Denied (403): Your token does not have permission to use {repo_id}."
36
- response.raise_for_status()
37
- except requests.exceptions.HTTPError as e:
38
- return "", f"HTTP Error while accessing {repo_id}: {e}"
39
- except Exception as e:
40
- return "", f"Generic connection failure: {e}"
41
-
42
- # --- Step 3: Build the LLM ---
43
- try:
44
- llm = HuggingFaceHub(
45
- repo_id=repo_id,
46
- huggingfacehub_api_token=hf_token,
47
- model_kwargs={"temperature": 0.0, "max_new_tokens": 512},
48
  )
 
 
 
 
49
  except Exception as e:
50
- return "", f"Error creating HuggingFaceHub LLM: {e}"
51
-
52
- # --- Step 4: Create pandas agent ---
53
- try:
54
- agent = create_pandas_dataframe_agent(llm, df, verbose=False)
55
- except Exception as e:
56
- return "", f"Error creating LangChain pandas agent: {e}"
57
-
58
- # --- Step 5: Run query ---
59
- try:
60
- answer = agent.run(query)
61
- except Exception as e:
62
- return "", f"Agent runtime error: {e}"
63
-
64
- return answer, ""
 
1
  # utils.py
2
  import pandas as pd
3
+ import streamlit as st
4
+ from huggingface_hub import InferenceClient
 
 
5
 
6
+ # Initialize Hugging Face Inference client using the secret
7
+ hf_token = st.secrets.get("HF_TOKEN")
8
+ if not hf_token:
9
+ st.error("HF_TOKEN not found in secrets. Please add it.")
10
+ st.stop()
11
+
12
+ client = InferenceClient(token=hf_token)
13
+
14
+ def query_agent_from_csv(file_bytes, user_query, model_repo="mistralai/Mistral-7B-Instruct-v0.3"):
15
  """
16
+ Reads a CSV and queries the Hugging Face Mistral model.
17
+ Returns the model's answer as string.
18
  """
 
19
  try:
20
+ # --- Step 1: Load CSV ---
21
  try:
22
  df = pd.read_csv(file_bytes)
23
  except Exception:
24
  file_bytes.seek(0)
25
  df = pd.read_csv(file_bytes, encoding="latin1")
26
+
27
+ # Limit columns to avoid huge inputs
28
+ MAX_COLS = 50
29
+ if df.shape[1] > MAX_COLS:
30
+ df = df.iloc[:, :MAX_COLS]
31
+
32
+ # --- Step 2: Summarize dataset for model context ---
33
+ summary = f"The dataset has {df.shape[0]} rows and {df.shape[1]} columns.\n"
34
+ summary += "Columns: " + ", ".join(df.columns[:10])
35
+ if df.shape[1] > 10:
36
+ summary += ", ..."
37
+
38
+ # --- Step 3: Build messages for chat API ---
39
+ messages = [
40
+ {"role": "system", "content": (
41
+ "You are a professional data analyst. "
42
+ "Analyze the CSV dataset and answer questions clearly with bullet points or tables if helpful."
43
+ )},
44
+ {"role": "user", "content": f"Dataset summary:\n{summary}"},
45
+ {"role": "user", "content": f"Question: {user_query}"}
46
+ ]
47
+
48
+ # --- Step 4: Query the model ---
49
+ response = client.chat_completion(
50
+ model=model_repo,
51
+ messages=messages,
52
+ max_tokens=512
 
53
  )
54
+
55
+ answer = response.choices[0].message["content"].strip()
56
+ return answer, ""
57
+
58
  except Exception as e:
59
+ return "", f"Error querying the model: {e}"