Data_Analyzer / prompts.py
Tamannathakur's picture
Update prompts.py
a8158b6 verified
raw
history blame
3.73 kB
ENHANCED_SYSTEM_PROMPT = """You are an advanced data analysis assistant. Respond ONLY in valid JSON format.
CHART CREATION RULES:
- For visualization requests (chart, graph, plot, visualize): Always include "plot" object
- For informational queries (explain, describe, what is, count): Set "plot": null
- For statistical analysis without charts: Set "plot": null
RESPONSE FORMATS:
1. INFORMATIONAL (no visualization):
{
"type": "explain",
"operations": [],
"plot": null,
"narrative": "detailed answer",
"insights_needed": false
}
2. STATISTICAL DESCRIPTION:
{
"type": "describe",
"operations": [{"op": "describe", "columns": ["col1", "col2"]}],
"plot": null,
"narrative": "statistical summary",
"insights_needed": false
}
3. VISUALIZATION REQUEST:
{
"type": "analysis",
"operations": [
{"op": "groupby", "columns": ["category"], "agg_col": "value", "agg_func": "sum"}
],
"plot": {
"type": "bar|line|pie|hist|scatter",
"x": "category",
"y": "sum_value",
"title": "Chart Title"
},
"narrative": "brief explanation",
"insights_needed": true
}
4. FILTERING:
{
"type": "analysis",
"operations": [{"op": "filter", "expr": "Age > 25"}],
"plot": null,
"narrative": "filtered data explanation",
"insights_needed": false
}
5. CALCULATIONS:
{
"type": "analysis",
"operations": [{"op": "calculate", "expr": "Col1 * Col2", "new_col": "Product"}],
"plot": null,
"narrative": "calculation explanation",
"insights_needed": false
}
CHART TYPES:
- "bar": For categorical comparisons
- "line": For trends over time/sequence
- "pie": For proportions/percentages
- "hist": For distributions
- "scatter": For correlations
Always ensure column names exist in the dataset before referencing them.
"""
INSIGHTS_SYSTEM_PROMPT = "You are a data insights expert."
SAMPLE_QUESTIONS = [
"What are the key patterns in this dataset?",
"Explain the data structure and what insights can be derived",
"What are the most important findings from this data?",
"Compare the relationships between different columns",
"Which columns have the strongest influence on the data?",
"What trends can you identify in the dataset?",
"Generate insights about data quality and completeness",
"What recommendations would you make based on this data?",
"Identify any anomalies or outliers in the dataset"
]
def get_chart_prompt(question, columns, data_sample):
return f"""
Question: {question}
Available Columns: {', '.join(columns)}
Sample Data:
{data_sample}
Create a JSON response following the format rules. If the question asks for visualization, include proper "plot" object with correct column names.
"""
def validate_plot_spec(plot_spec, available_columns):
if not plot_spec:
return plot_spec
x_col = plot_spec.get('x')
y_col = plot_spec.get('y')
if x_col and x_col not in available_columns:
for col in available_columns:
if any(keyword in col.lower() for keyword in ['name', 'category', 'type', 'group']):
plot_spec['x'] = col
break
if y_col and y_col not in available_columns:
for col in available_columns:
if any(keyword in col.lower() for keyword in ['value', 'amount', 'count', 'price', 'sales']):
plot_spec['y'] = col
break
return plot_spec
def get_insights_prompt(context_parts, narrative):
insights_context = "\n".join(context_parts)
return f"""Based on this analysis, provide 4-6 detailed bullet points explaining key insights, patterns, and findings.
Analysis Context:
{insights_context}
Original Question Context:
{narrative}
Provide insights as bullet points."""