Spaces:
Sleeping
Sleeping
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| import pandas as pd | |
| import os | |
| import io | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS, cross_origin | |
| import logging | |
| from dotenv import load_dotenv | |
| from pandasai import SmartDatalake | |
| from pandasai import SmartDataframe | |
| from pandasai.responses.response_parser import ResponseParser | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains import LLMChain | |
| from datetime import datetime | |
| import matplotlib.pyplot as plt | |
| import google.generativeai as genai | |
| import uuid | |
| import base64 | |
| from io import BytesIO | |
| import json # <-- Added import | |
| load_dotenv() | |
| app = Flask(__name__) | |
| cors = CORS(app) | |
| class FlaskResponse(ResponseParser): | |
| def __init__(self, context): | |
| super().__init__(context) | |
| def format_dataframe(self, result): | |
| return result["value"].to_html() | |
| def format_plot(self, result): | |
| val = result["value"] | |
| # If val is a matplotlib figure, handle it accordingly. | |
| if hasattr(val, "savefig"): | |
| try: | |
| buf = io.BytesIO() | |
| val.savefig(buf, format="png") | |
| buf.seek(0) | |
| image_base64 = base64.b64encode(buf.read()).decode("utf-8") | |
| return f"data:image/png;base64,{image_base64}" | |
| except Exception as e: | |
| print("Error processing figure:", e) | |
| return str(val) | |
| # If val is a string and is a valid file path, read and encode it. | |
| if isinstance(val, str) and os.path.isfile(os.path.join(val)): | |
| image_path = os.path.join(val) | |
| print("My image path:", image_path) | |
| with open(image_path, "rb") as file: | |
| data = file.read() | |
| base64_data = base64.b64encode(data).decode("utf-8") | |
| return f"data:image/png;base64,{base64_data}" | |
| # Fallback: return as a string. | |
| return str(val) | |
| def format_other(self, result): | |
| # For non-image responses, simply return the value as a string. | |
| return str(result["value"]) | |
| gemini_api_key = os.getenv('Gemini') | |
| llm = ChatGoogleGenerativeAI(api_key=gemini_api_key, model='gemini-2.0-flash-thinking-exp', temperature=0.1) | |
| gemini_api_key = os.environ['Gemini'] | |
| genai.configure(api_key=gemini_api_key) | |
| # --- Model for TEXT/Markdown output --- | |
| text_generation_config = { | |
| "temperature": 0.2, | |
| "top_p": 0.95, | |
| "max_output_tokens": 5000, | |
| } | |
| text_model = genai.GenerativeModel( | |
| model_name="gemini-2.0-flash", | |
| generation_config=text_generation_config, | |
| ) | |
| # --- Model for JSON output --- | |
| json_generation_config = { | |
| "temperature": 0.2, | |
| "top_p": 0.95, | |
| "max_output_tokens": 8192, # Increased for potentially large JSON | |
| "response_mime_type": "application/json", # <-- This is the crucial part | |
| } | |
| # Ensure you use a model version that fully supports JSON output mode | |
| json_model = genai.GenerativeModel( | |
| model_name="gemini-2.0-flash", | |
| generation_config=json_generation_config, | |
| ) | |
| guid = uuid.uuid4() | |
| new_filename = f"{guid}" | |
| user_defined_path = os.path.join("/exports/charts", new_filename) | |
| # Endpoint for chat | |
| def bot(): | |
| # Retrieve parameters from the request | |
| json_table = request.json.get("json_table") | |
| user_question = request.json.get("user_question") | |
| print("User question:", user_question) | |
| # Convert the table data into a dataframe | |
| data = eval(str(json_table)) | |
| df = pd.DataFrame(data) | |
| print("Columns in dataframe:", list(df.columns)) | |
| # Create a SmartDataframe instance using your configuration. | |
| pandas_agent = SmartDataframe( | |
| df, | |
| config={ | |
| "llm": llm, | |
| "response_parser": FlaskResponse, | |
| "custom_whitelisted_dependencies": [ | |
| "os", | |
| "io", | |
| "sys", | |
| "chr", | |
| "glob", | |
| "b64decoder", | |
| "collections", | |
| "geopy", | |
| "geopandas", | |
| "wordcloud", | |
| "builtins" | |
| ], | |
| "security": "none", "save_charts_path": user_defined_path, "save_charts": False, "enable_cache": False, "conversational":True | |
| } | |
| ) | |
| # Get the answer from the agent | |
| answer = pandas_agent.chat(user_question) | |
| # Process the answer based on its type | |
| formatted_answer = None | |
| if isinstance(answer, pd.DataFrame): | |
| formatted_answer = answer.to_html() | |
| elif isinstance(answer, plt.Figure): | |
| buf = io.BytesIO() | |
| answer.savefig(buf, format="png") | |
| buf.seek(0) | |
| image_base64 = base64.b64encode(buf.read()).decode("utf-8") | |
| formatted_answer = f"data:image/png;base64,{image_base64}" | |
| elif isinstance(answer, (int, float)): | |
| formatted_answer = str(answer) | |
| else: | |
| formatted_answer = str(answer) | |
| # Return the formatted answer as JSON. | |
| return jsonify({"answer": formatted_answer}) | |
| # ------------------------------------------------------------------- | |
| # --- NEW AUTOMATIC ANALYSIS ENDPOINT --- | |
| # ------------------------------------------------------------------- | |
| def auto_analyze_dataset(): | |
| """ | |
| Analyzes a dataset without a user query, generating | |
| both chartable JSON data and text insights. | |
| """ | |
| json_table_str = request.json.get("json_table") | |
| if not json_table_str: | |
| return jsonify({"error": "No 'json_table' provided."}), 400 | |
| # Enhanced prompt for reliable JSON output | |
| prompt = f"""You are an expert data analyst. Analyze the dataset below and respond with ONLY a valid JSON object. | |
| CRITICAL: Your entire response must be ONLY the JSON object. No explanations, no markdown code fences, no additional text. | |
| Required JSON structure: | |
| {{ | |
| "charts": [ | |
| {{ | |
| "key": "descriptive_name_in_snake_case", | |
| "chart_type": "line", | |
| "data": [{{"x_label": "Category1", "y_value": 100}}, {{"x_label": "Category2", "y_value": 150}}] | |
| }}, | |
| {{ | |
| "key": "another_chart_name", | |
| "chart_type": "bar", | |
| "data": [{{"category": "A", "value": 50}}, {{"category": "B", "value": 75}}] | |
| }} | |
| ], | |
| "insights": "Executive summary with key findings and actionable recommendations. Use markdown formatting for readability." | |
| }} | |
| Instructions: | |
| 1. Identify 2-4 key insights from the data (trends, patterns, outliers, correlations, distributions, significant variables) | |
| 2. For each insight, create a chart with aggregated data | |
| 3. Chart types: "line", "bar", "pie", "table", "heatmaps" | |
| 4. Data arrays should have consistent keys per chart | |
| 5. Write a compelling executive summary in the insights field with actionable recommendations. | |
| Dataset to analyze: | |
| {json_table_str} | |
| Return ONLY the JSON object now:""" | |
| try: | |
| # Generate content using text_model (no response_mime_type needed) | |
| response = text_model.generate_content(prompt) | |
| response_text = response.text.strip() | |
| print(f"Raw AI response (first 200 chars): {response_text[:200]}") | |
| # Aggressive cleaning to extract JSON | |
| # Remove markdown code blocks | |
| if "```json" in response_text: | |
| response_text = response_text.split("```json", 1)[1] | |
| response_text = response_text.split("```", 1)[0] | |
| elif "```" in response_text: | |
| response_text = response_text.split("```", 1)[1] | |
| response_text = response_text.split("```", 1)[0] | |
| # Remove any leading/trailing whitespace | |
| response_text = response_text.strip() | |
| # Find JSON object boundaries if there's extra text | |
| if not response_text.startswith("{"): | |
| start_idx = response_text.find("{") | |
| if start_idx != -1: | |
| response_text = response_text[start_idx:] | |
| if not response_text.endswith("}"): | |
| end_idx = response_text.rfind("}") | |
| if end_idx != -1: | |
| response_text = response_text[:end_idx + 1] | |
| # Parse the JSON | |
| output_data = json.loads(response_text) | |
| # Validate required structure | |
| if not isinstance(output_data, dict): | |
| raise ValueError("Response is not a JSON object.") | |
| if "charts" not in output_data: | |
| raise ValueError("Output JSON missing 'charts' key.") | |
| if "insights" not in output_data: | |
| raise ValueError("Output JSON missing 'insights' key.") | |
| if not isinstance(output_data["charts"], list): | |
| raise ValueError("'charts' must be an array.") | |
| # Validate each chart has required fields | |
| for idx, chart in enumerate(output_data["charts"]): | |
| if not isinstance(chart, dict): | |
| raise ValueError(f"Chart at index {idx} is not an object.") | |
| if "key" not in chart or "chart_type" not in chart or "data" not in chart: | |
| raise ValueError(f"Chart at index {idx} missing required fields (key, chart_type, data).") | |
| if not isinstance(chart["data"], list): | |
| raise ValueError(f"Chart '{chart.get('key', idx)}' data must be an array.") | |
| print(f"Successfully parsed {len(output_data['charts'])} charts") | |
| return jsonify(output_data) | |
| except json.JSONDecodeError as e: | |
| print(f"JSON Decode Error: {e}") | |
| print(f"Attempted to parse: {response_text[:500]}") | |
| return jsonify({ | |
| "error": "Failed to parse AI response as JSON.", | |
| "details": str(e), | |
| "raw_response": response.text[:1000] | |
| }), 500 | |
| except ValueError as e: | |
| print(f"Validation Error: {e}") | |
| return jsonify({ | |
| "error": "Invalid response structure from AI.", | |
| "details": str(e) | |
| }), 500 | |
| except Exception as e: | |
| print(f"Unexpected Error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({ | |
| "error": "Internal server error during analysis.", | |
| "details": str(e) | |
| }), 500 | |
| # Reports endpoint | |
| def busines_report(): | |
| json_data = request.json.get("json_data") | |
| prompt = """ | |
| You are Quantilytix business analyst. Analyze the following data and generate a comprehensive and insightful business report, including appropriate key perfomance indicators and recommendations Use markdown formatting and tables where necessary. only return the report and nothing else. | |
| data: | |
| """ + str(json_data) | |
| response = text_model.generate_content(prompt) # <-- Use text_model | |
| report = response.text | |
| return jsonify(str(report)) | |
| # MArketing | |
| def marketing(): | |
| json_data = request.json.get("json_data") | |
| prompt = """ | |
| You are an Quantilytix Marketing Specialist. Analyze the following data and generate a comprehensive marketing strategy, Only return the marketing strategy. be very creative: | |
| """ + str(json_data) | |
| response = text_model.generate_content(prompt) # <-- Use text_model | |
| report = response.text | |
| return jsonify(str(report)) | |
| # Business Plan | |
| def business_plan(): | |
| json_data = request.json.get("json_data") | |
| prompt = """ | |
| You are Quantilytix business analyst. Analyze the following data and generate a comprehensive business plan to help the business look for funding and support. Use markdown formatting and tables where necessary. only return the repor and nothing else, the data: | |
| """ + str(json_data) | |
| response = text_model.generate_content(prompt) # <-- Use text_model | |
| report = response.text | |
| return jsonify(str(report)) | |
| #Notificatiions | |
| def notifications(): | |
| json_data = request.json.get("json_data") | |
| prompt = """ | |
| You are Quantilytix business analyst. Write a very brief analysis and marketing tips using this business data. your output should be suitable for a notification dashboard so no quips. | |
| """ + str(json_data) | |
| response = text_model.generate_content(prompt) # <-- Use text_model | |
| report = response.text | |
| return jsonify(str(report)) | |
| if __name__ == "__main__": | |
| app.run(debug=True, host="0.0.0.0", port=7860) | |