Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import csv | |
| import io | |
| from langchain_community.llms import HuggingFaceEndpoint | |
| # FIX: Changed import path from langchain_community to langchain_experimental | |
| from langchain_experimental.agents import create_pandas_dataframe_agent | |
| from dotenv import load_dotenv | |
| import os | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # --- Hugging Face Model Configuration --- | |
| HF_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.2" | |
| def detect_delimiter(file_content: bytes) -> str: | |
| """Detects the delimiter of a CSV file content.""" | |
| try: | |
| # Decode the first few lines to sample the content | |
| sample = file_content.decode('utf-8').splitlines()[:5] | |
| if not sample: | |
| return ',' # Default to comma if empty | |
| # Use csv.Sniffer to guess the dialect (and thus the delimiter) | |
| dialect = csv.Sniffer().sniff('\n'.join(sample)) | |
| return dialect.delimiter | |
| except Exception: | |
| # Fallback to a comma if sniffing fails | |
| return ',' | |
| def query_agent(uploaded_file_content: bytes, query: str, hf_api_token: str) -> str: | |
| """ | |
| Initializes a LangChain Pandas Agent and processes a natural language query using a Hugging Face LLM. | |
| Args: | |
| uploaded_file_content: The byte content of the uploaded CSV file. | |
| query: The natural language question from the user. | |
| hf_api_token: The API token for the Hugging Face Hub. | |
| Returns: | |
| The response generated by the agent. | |
| """ | |
| if not hf_api_token: | |
| # Updated error message for Hugging Face | |
| return "Error: HUGGINGFACEHUB_API_TOKEN is not configured." | |
| try: | |
| # 1. Robustly read CSV content using detected delimiter | |
| delimiter = detect_delimiter(uploaded_file_content) | |
| data_io = io.StringIO(uploaded_file_content.decode('utf-8')) | |
| df = pd.read_csv(data_io, sep=delimiter) | |
| # 2. Initialize the LLM using HuggingFaceEndpoint | |
| llm = HuggingFaceEndpoint( | |
| repo_id=HF_REPO_ID, | |
| huggingfacehub_api_token=hf_api_token, | |
| temperature=0.0, # Keep reasoning deterministic | |
| max_new_tokens=512 | |
| ) | |
| # 3. Create the Pandas DataFrame Agent | |
| # CRITICAL SECURITY NOTE: Must set allow_dangerous_code=True | |
| # as LangChain now requires an explicit opt-in for code execution agents. | |
| agent = create_pandas_dataframe_agent( | |
| llm, | |
| df, | |
| verbose=True, | |
| allow_dangerous_code=True, # Added to prevent runtime ValueError | |
| # Include a system prompt to guide the agent's behavior | |
| agent_kwargs={ | |
| "system_message": ( | |
| "You are an expert data analysis assistant. You are interacting with a pandas DataFrame " | |
| "named 'df'. Use Python code only to answer questions about the data. " | |
| "Do not make up facts. Always show the code you executed before giving the final answer." | |
| ) | |
| } | |
| ) | |
| # 4. Run the query | |
| response = agent.run(query) | |
| return response | |
| except Exception as e: | |
| # Catch and report any exceptions during processing | |
| return f"An error occurred during analysis: {e}" |