import pandas as pd import csv import io from langchain_community.llms import HuggingFaceEndpoint # FIX: Changed import path from langchain_community to langchain_experimental from langchain_experimental.agents import create_pandas_dataframe_agent from dotenv import load_dotenv import os # Load environment variables from .env file load_dotenv() # --- Hugging Face Model Configuration --- HF_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.2" def detect_delimiter(file_content: bytes) -> str: """Detects the delimiter of a CSV file content.""" try: # Decode the first few lines to sample the content sample = file_content.decode('utf-8').splitlines()[:5] if not sample: return ',' # Default to comma if empty # Use csv.Sniffer to guess the dialect (and thus the delimiter) dialect = csv.Sniffer().sniff('\n'.join(sample)) return dialect.delimiter except Exception: # Fallback to a comma if sniffing fails return ',' def query_agent(uploaded_file_content: bytes, query: str, hf_api_token: str) -> str: """ Initializes a LangChain Pandas Agent and processes a natural language query using a Hugging Face LLM. Args: uploaded_file_content: The byte content of the uploaded CSV file. query: The natural language question from the user. hf_api_token: The API token for the Hugging Face Hub. Returns: The response generated by the agent. """ if not hf_api_token: # Updated error message for Hugging Face return "Error: HUGGINGFACEHUB_API_TOKEN is not configured." try: # 1. Robustly read CSV content using detected delimiter delimiter = detect_delimiter(uploaded_file_content) data_io = io.StringIO(uploaded_file_content.decode('utf-8')) df = pd.read_csv(data_io, sep=delimiter) # 2. Initialize the LLM using HuggingFaceEndpoint llm = HuggingFaceEndpoint( repo_id=HF_REPO_ID, huggingfacehub_api_token=hf_api_token, temperature=0.0, # Keep reasoning deterministic max_new_tokens=512 ) # 3. Create the Pandas DataFrame Agent # CRITICAL SECURITY NOTE: Must set allow_dangerous_code=True # as LangChain now requires an explicit opt-in for code execution agents. agent = create_pandas_dataframe_agent( llm, df, verbose=True, allow_dangerous_code=True, # Added to prevent runtime ValueError # Include a system prompt to guide the agent's behavior agent_kwargs={ "system_message": ( "You are an expert data analysis assistant. You are interacting with a pandas DataFrame " "named 'df'. Use Python code only to answer questions about the data. " "Do not make up facts. Always show the code you executed before giving the final answer." ) } ) # 4. Run the query response = agent.run(query) return response except Exception as e: # Catch and report any exceptions during processing return f"An error occurred during analysis: {e}"