charesz commited on
Commit
e53c1ba
·
verified ·
1 Parent(s): 2cb7db8

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +55 -0
utils.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py
2
+ import pandas as pd
3
+ from langchain.llms import HuggingFaceHub
4
+ from langchain.agents import create_pandas_dataframe_agent
5
+ from typing import Tuple
6
+
7
+ def query_agent_from_csv(file_bytes, query: str, hf_token: str, repo_id: str = "mistralai/mistral-7b-instruct") -> Tuple[str, str]:
8
+ """
9
+ Reads a CSV from file-like bytes, builds a LangChain pandas-agent with HuggingFaceHub LLM,
10
+ runs the user query and returns (answer, debug_info).
11
+ - file_bytes: bytes of the uploaded CSV file (Streamlit provides)
12
+ - query: user's natural language question
13
+ - hf_token: huggingface token (string)
14
+ - repo_id: huggingface repo id for the model to use (e.g. 'mistralai/mistral-7b-instruct')
15
+ Returns: tuple (answer_text, debug_text)
16
+ """
17
+ try:
18
+ # Read CSV — attempt common encodings and fallback
19
+ try:
20
+ df = pd.read_csv(file_bytes)
21
+ except Exception:
22
+ # try with latin1 encoding as fallback
23
+ file_bytes.seek(0)
24
+ df = pd.read_csv(file_bytes, encoding="latin1")
25
+ except Exception as e:
26
+ return f"Error reading CSV: {e}", ""
27
+
28
+ # small safety: if dataset is extremely wide, limit columns
29
+ MAX_COLS = 200
30
+ if df.shape[1] > MAX_COLS:
31
+ df = df.iloc[:, :MAX_COLS]
32
+
33
+ # Build the LLM wrapper for Hugging Face Hub
34
+ try:
35
+ llm = HuggingFaceHub(
36
+ repo_id=repo_id,
37
+ huggingfacehub_api_token=hf_token,
38
+ model_kwargs={"temperature": 0.0, "max_new_tokens": 512},
39
+ )
40
+ except Exception as e:
41
+ return "", f"Error creating HuggingFaceHub LLM: {e}"
42
+
43
+ # Create pandas agent
44
+ try:
45
+ agent = create_pandas_dataframe_agent(llm, df, verbose=False)
46
+ except Exception as e:
47
+ return "", f"Error creating LangChain pandas agent: {e}"
48
+
49
+ # Run query (wrap in try/except to capture agent errors)
50
+ try:
51
+ answer = agent.run(query)
52
+ except Exception as e:
53
+ return "", f"Agent runtime error: {e}"
54
+
55
+ return answer, ""