| | from langchain_core.prompts import ChatPromptTemplate |
| | import numpy as np |
| |
|
| | keywords = ["i encountered", "429", "unfortunately", "unexpected character", "unsupported", "error", "sorry", "response", "unable", "because", "too many"] |
| |
|
| | def contains_keywords(text, keywords): |
| | return any(keyword.lower() in text.lower() for keyword in keywords) |
| |
|
| | def process_answer(answer): |
| | |
| | if isinstance(answer, list) and not answer: |
| | return "Empty response received." |
| |
|
| | |
| | if isinstance(answer, dict): |
| | if "answer" in answer and contains_keywords(answer["answer"], keywords): |
| | return True |
| | if "error" in answer and contains_keywords(answer["error"], keywords): |
| | return True |
| |
|
| | |
| | return False |
| |
|
| |
|
| | def _prompt_generator(question: str, chart_required: bool, csv_url: str): |
| |
|
| | chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines: |
| | |
| | 1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis. |
| | 2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability. |
| | 3. **Communication:** Provide concise, professional, and well-structured responses. |
| | 4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.) |
| | 5. Always use pd.read_csv({csv_url}) to read the CSV file. |
| | 6. **Full Dataset Interpretation Rule:** When asked about the entire dataset, provide a concise summary of the dataset's structure, such as the number of rows, columns, and data types. |
| | (Example: “give me total number of columns”, “show all statistics”, “full summary”, etc.) |
| | |
| | **Query:** {question} |
| | |
| | """ |
| | |
| | chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY: |
| | |
| | 1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex |
| | 2. Visualization requirements: |
| | - Adjust font sizes, rotate labels (45° if needed), truncate for readability |
| | - Figure size: (12, 6) |
| | - Descriptive titles (fontsize=14) |
| | - Colorblind-friendly palettes |
| | - Do not use any visualization library other than matplotlib or seaborn |
| | 3. File handling rules: |
| | - Create MAXIMUM 2 charts if absolutely necessary |
| | - For multiple charts: |
| | * Arrange in grid format (2x1 vertical layout preferred) |
| | * Use SAME unique_id with suffixes: |
| | - f"{{unique_id}}_1.png" |
| | - f"{{unique_id}}_2.png" |
| | - Save EXCLUSIVELY to "generated_charts" folder |
| | - File naming: f"chart_{{unique_id}}.png" (for single chart) |
| | 4. FINAL OUTPUT MUST BE: |
| | - For single chart: f"generated_charts/chart_{{unique_id}}.png" |
| | - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image) |
| | - ONLY return this full path string, nothing else |
| | |
| | **Query:** {question} |
| | |
| | IMPORTANT: |
| | - Generate the unique_id FIRST before any operations |
| | - Use THE SAME unique_id throughout entire process |
| | - NEVER generate new UUIDs after initial creation |
| | - Return EXACT filepath string of the final saved chart |
| | - Always use pd.read_csv({csv_url}) to read the CSV file |
| | """ |
| |
|
| | |
| | if chart_required: |
| | return ChatPromptTemplate.from_template(chart_prompt) |
| | else: |
| | return ChatPromptTemplate.from_template(chat_prompt) |
| | |
| | |
| | |
| | def handle_out_of_range_float(value): |
| | if isinstance(value, float): |
| | if np.isnan(value): |
| | return None |
| | elif np.isinf(value): |
| | return "Infinity" |
| | return value |