Spaces:
Sleeping
Sleeping
| ENHANCED_SYSTEM_PROMPT = """You are an advanced data analysis assistant. Respond ONLY in valid JSON format. | |
| CHART CREATION RULES: | |
| - For visualization requests (chart, graph, plot, visualize): Always include "plot" object | |
| - For informational queries (explain, describe, what is, count): Set "plot": null | |
| - For statistical analysis without charts: Set "plot": null | |
| RESPONSE FORMATS: | |
| 1. INFORMATIONAL (no visualization): | |
| { | |
| "type": "explain", | |
| "operations": [], | |
| "plot": null, | |
| "narrative": "detailed answer", | |
| "insights_needed": false | |
| } | |
| 2. STATISTICAL DESCRIPTION: | |
| { | |
| "type": "describe", | |
| "operations": [{"op": "describe", "columns": ["col1", "col2"]}], | |
| "plot": null, | |
| "narrative": "statistical summary", | |
| "insights_needed": false | |
| } | |
| 3. VISUALIZATION REQUEST: | |
| { | |
| "type": "analysis", | |
| "operations": [ | |
| {"op": "groupby", "columns": ["category"], "agg_col": "value", "agg_func": "sum"} | |
| ], | |
| "plot": { | |
| "type": "bar|line|pie|hist|scatter", | |
| "x": "category", | |
| "y": "sum_value", | |
| "title": "Chart Title" | |
| }, | |
| "narrative": "brief explanation", | |
| "insights_needed": true | |
| } | |
| 4. FILTERING: | |
| { | |
| "type": "analysis", | |
| "operations": [{"op": "filter", "expr": "Age > 25"}], | |
| "plot": null, | |
| "narrative": "filtered data explanation", | |
| "insights_needed": false | |
| } | |
| 5. CALCULATIONS: | |
| { | |
| "type": "analysis", | |
| "operations": [{"op": "calculate", "expr": "Col1 * Col2", "new_col": "Product"}], | |
| "plot": null, | |
| "narrative": "calculation explanation", | |
| "insights_needed": false | |
| } | |
| CHART TYPES: | |
| - "bar": For categorical comparisons | |
| - "line": For trends over time/sequence | |
| - "pie": For proportions/percentages | |
| - "hist": For distributions | |
| - "scatter": For correlations | |
| Always ensure column names exist in the dataset before referencing them. | |
| """ | |
| INSIGHTS_SYSTEM_PROMPT = "You are a data insights expert." | |
| SAMPLE_QUESTIONS = [ | |
| "What are the key patterns in this dataset?", | |
| "Explain the data structure and what insights can be derived", | |
| "What are the most important findings from this data?", | |
| "Compare the relationships between different columns", | |
| "Which columns have the strongest influence on the data?", | |
| "What trends can you identify in the dataset?", | |
| "Generate insights about data quality and completeness", | |
| "What recommendations would you make based on this data?", | |
| "Identify any anomalies or outliers in the dataset" | |
| ] | |
| def get_chart_prompt(question, columns, data_sample): | |
| return f""" | |
| Question: {question} | |
| Available Columns: {', '.join(columns)} | |
| Sample Data: | |
| {data_sample} | |
| Create a JSON response following the format rules. If the question asks for visualization, include proper "plot" object with correct column names. | |
| """ | |
| def validate_plot_spec(plot_spec, available_columns): | |
| if not plot_spec: | |
| return plot_spec | |
| x_col = plot_spec.get('x') | |
| y_col = plot_spec.get('y') | |
| if x_col and x_col not in available_columns: | |
| for col in available_columns: | |
| if any(keyword in col.lower() for keyword in ['name', 'category', 'type', 'group']): | |
| plot_spec['x'] = col | |
| break | |
| if y_col and y_col not in available_columns: | |
| for col in available_columns: | |
| if any(keyword in col.lower() for keyword in ['value', 'amount', 'count', 'price', 'sales']): | |
| plot_spec['y'] = col | |
| break | |
| return plot_spec | |
| def get_insights_prompt(context_parts, narrative): | |
| insights_context = "\n".join(context_parts) | |
| return f"""Based on this analysis, provide 4-6 detailed bullet points explaining key insights, patterns, and findings. | |
| Analysis Context: | |
| {insights_context} | |
| Original Question Context: | |
| {narrative} | |
| Provide insights as bullet points.""" |