Spaces:
Sleeping
Sleeping
| from huggingface_hub import InferenceClient | |
| import os | |
| import pandas as pd | |
| def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str: | |
| summary = f"Columns: {', '.join(df.columns)}\n\n" | |
| if len(df) > max_rows: | |
| sample = df.sample(max_rows, random_state=42) | |
| summary += "Showing a random sample of rows:\n" | |
| else: | |
| sample = df | |
| summary += "Showing all rows:\n" | |
| summary += sample.to_string(index=False) | |
| return summary | |
| def query_agent(df: pd.DataFrame, query: str) -> str: | |
| query_lower = query.lower() | |
| # ----------------- Direct Analysis for Most Common ----------------- | |
| try: | |
| if "most common" in query_lower or "most frequent" in query_lower: | |
| # Look for multiple columns in query | |
| cols_in_query = [col for col in df.columns if col.lower() in query_lower] | |
| if len(cols_in_query) == 1: | |
| col = cols_in_query[0] | |
| value = df[col].mode()[0] | |
| return f"The most common value in column '{col}' is '{value}'." | |
| elif len(cols_in_query) > 1: | |
| # Compute most common combination of values across the columns | |
| combo_series = df[cols_in_query].apply(lambda row: tuple(row), axis=1) | |
| most_common_combo = combo_series.mode()[0] | |
| combo_str = ", ".join(f"{col}={val}" for col, val in zip(cols_in_query, most_common_combo)) | |
| return f"The most common combination of values is: {combo_str}" | |
| except Exception as e: | |
| print("Direct analysis failed:", e) | |
| # ----------------- Use LLM if direct analysis fails ----------------- | |
| data_text = summarize_dataframe(df) | |
| prompt = f""" | |
| You are a data analysis assistant with expertise in statistics and data interpretation. | |
| Analyze the dataset sample below and answer the user's question in a clear, detailed, and well-explained way. | |
| Include both the direct answer and a short explanation or reasoning behind it. | |
| Dataset Summary: | |
| {data_text} | |
| Question: | |
| {query} | |
| Answer (with explanation): | |
| """ | |
| # Initialize client with explicit provider | |
| client = InferenceClient( | |
| model="google/gemma-2b-it", | |
| provider="hf-inference", | |
| token=os.environ.get("HUGGINGFACE_API_TOKEN"), | |
| ) | |
| try: | |
| response = client.text_generation( | |
| prompt, | |
| max_new_tokens=1024, | |
| temperature=0.7, | |
| ) | |
| except Exception as e: | |
| print("Model call failed:", e) | |
| return "⚠️ Sorry, the model could not generate an answer. Please try again." | |
| # Extract text safely | |
| if isinstance(response, str): | |
| return response | |
| elif isinstance(response, dict) and "generated_text" in response: | |
| return response["generated_text"] | |
| elif isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]: | |
| return response[0]["generated_text"] | |
| else: | |
| return str(response) |