| import ast |
| import gradio as gr |
| from db_work import DatabaseInterface |
| import os |
| from PIL import Image |
| import var_stats |
| import requests |
| from typing import Optional, Dict |
| import json |
| import time |
|
|
| BASE_URL = "https://beeguy74--example-fastapi-fastapi-app.modal.run" |
|
|
| |
| db_interface = None |
| db_connection_status = "❌ Not Connected" |
|
|
| class API: |
| def __init__(self, base_url: str): |
| self.base_url = base_url |
| self.session = requests.Session() |
| self.session.headers.update({"Content-Type": "application/json"}) |
| |
| def generate_code(self, user_request: str) -> tuple[Optional[str], str]: |
| """Generate and execute Python code using langchain""" |
| try: |
| payload = {"user_request": user_request} |
| response = self.session.post( |
| f"{self.base_url}/generate-code", |
| json=payload, |
| timeout=120 |
| ) |
| |
| if response.status_code == 200: |
| result = response.json() |
| return result.get("output", ""), "✅ Code executed successfully" |
| else: |
| return None, f"❌ Code generation failed: {response.status_code} - {response.text}" |
| |
| except Exception as e: |
| return None, f"❌ Error generating code: {str(e)}" |
| |
| def generate_graph(self, graph_type: str, data_dict: Dict) -> tuple[Optional[str], str]: |
| """Generate a graph using matplotlib""" |
| try: |
| payload = { |
| "graph_type": graph_type, |
| "data": json.dumps(data_dict) |
| } |
| |
| response = self.session.post( |
| f"{self.base_url}/generate-graph", |
| json=payload, |
| timeout=120 |
| ) |
| |
| if response.status_code == 200: |
| timestamp = int(time.time()) |
| image_path = f"./graph_{graph_type}_{timestamp}.png" |
| with open(image_path, "wb") as f: |
| f.write(response.content) |
| return image_path, f"✅ {graph_type.title()} chart generated successfully" |
| else: |
| return None, f"❌ Graph generation failed: {response.status_code} - {response.text}" |
| |
| except Exception as e: |
| return None, f"❌ Error generating graph: {str(e)}" |
| |
| def download_file(self, file_path: str) -> tuple[Optional[str], str]: |
| """Download a file from the service""" |
| try: |
| params = {"file_path": file_path} |
| response = self.session.get( |
| f"{self.base_url}/download-file", |
| params=params, |
| timeout=120 |
| ) |
| |
| if response.status_code == 200: |
| timestamp = int(time.time()) |
| local_path = f"./downloaded_{timestamp}_{os.path.basename(file_path)}" |
| with open(local_path, "wb") as f: |
| f.write(response.content) |
| return local_path, f"✅ File downloaded: {file_path}" |
| else: |
| return None, f"❌ Download failed: {response.status_code} - {response.text}" |
| |
| except Exception as e: |
| return None, f"❌ Error downloading file: {str(e)}" |
|
|
| def setup_database_connection(host: str, port: str, database: str, user: str, password: str): |
| """Setup database connection with user-provided configuration""" |
| global db_interface, db_connection_status |
| |
| if not all([host.strip(), port.strip(), database.strip(), user.strip(), password.strip()]): |
| db_connection_status = "❌ All fields are required" |
| return db_connection_status, False |
| |
| try: |
| db_config = { |
| 'host': host.strip(), |
| 'port': int(port.strip()), |
| 'database': database.strip(), |
| 'user': user.strip(), |
| 'password': password.strip() |
| } |
| |
| |
| test_interface = DatabaseInterface(db_config) |
| test_connection = test_interface.get_db_connection() |
| test_connection.close() |
| |
| |
| db_interface = test_interface |
| db_connection_status = f"✅ Connected to {database} at {host}:{port}" |
| return db_connection_status, True |
| |
| except ValueError: |
| db_connection_status = "❌ Port must be a valid number" |
| return db_connection_status, False |
| except Exception as e: |
| db_connection_status = f"❌ Connection failed: {str(e)}" |
| return db_connection_status, False |
|
|
| def get_connection_status(): |
| """Get current database connection status""" |
| return db_connection_status |
|
|
| def check_db_connection(): |
| """Check if database is connected before operations""" |
| if db_interface is None: |
| return False, "❌ Please configure database connection first" |
| return True, "✅ Database connected" |
|
|
| |
| api_service = API(BASE_URL) |
|
|
| |
| def get_schemas(): |
| """### `get_schemas()`""" |
| connected, status = check_db_connection() |
| if not connected: |
| return status |
| return db_interface.list_schemas() |
|
|
| def get_db_infos(): |
| """### `get_db_infos()`""" |
| connected, status = check_db_connection() |
| if not connected: |
| return status |
| return db_interface.list_database_info() |
|
|
| def get_list_of_tables_in_schema(schema): |
| """### `get_list_of_tables_in_schema(schema_name: str)`""" |
| connected, status = check_db_connection() |
| if not connected: |
| return status |
| return db_interface.list_tables_in_schema(schema) |
|
|
| def get_list_of_column_in_table(schema, table): |
| """### `get_list_of_column_in_table(schema_name: str, table_name: str)`""" |
| connected, status = check_db_connection() |
| if not connected: |
| return status |
| return db_interface.list_columns_in_table(schema, table) |
|
|
| def run_read_only_query(query: str): |
| """### `run_read_only_query(query: str)`""" |
| connected, status = check_db_connection() |
| if not connected: |
| return status |
| return db_interface.read_only_query(query) |
|
|
| def create_table_from_query(table_name: str, source_query: str): |
| """### `create_table_from_query(table_name: str, source_query: str)`""" |
| connected, status = check_db_connection() |
| if not connected: |
| return status |
| return db_interface.create_table_from_query(table_name, source_query) |
|
|
| def drop_table(table_name: str): |
| """### `drop_table(table_name: str)`""" |
| connected, status = check_db_connection() |
| if not connected: |
| return status |
| return db_interface.drop_table(table_name) |
|
|
| def create_sample_image(): |
| img_path = "./sample_graph.png" |
| if not os.path.exists(img_path): |
| img = Image.new("RGB", (300, 150), color="lightgreen") |
| img.save(img_path) |
| return img_path |
|
|
| def serve_image_from_path(): |
| """Serve the sample image""" |
| return create_sample_image() |
|
|
| def do_annova(table_name, min_sample_size=0): |
| ''' |
| this function runs the annova on the dataset and render the associated F_score and p_value |
| table_name is the name of the table on which you want to run the ANOVA |
| the selected table MUST have the following signature: |
| |
| groups | measurement |
| |
| exemple with the product_type_age table: |
| |
| type | age |
| ---------- |
| 'Coat', '36' |
| 'Coat', '36' |
| 'Hat/beanie', '32' |
| ... |
| |
| min_sample_size is used to exclude categories that does not have enough measurement. |
| default = 0: all categories are selected |
| |
| return type is: dict |
| { |
| "F-statistic": round(f_stat, 3), |
| "p-value": round(p_value, 3) |
| } |
| ''' |
| return var_stats.anova(db_interface, table_name=table_name, min_sample_size=int(min_sample_size)) |
|
|
| def do_tukey_test(table_name, min_sample_size=0): |
| ''' |
| this function runs a Tukey's HSD (Honestly Significant Difference) test — a post-hoc analysis following ANOVA. |
| It tells you which specific pairs of groups differ significantly in their means |
| IT is meant to be used after you run a successful anova and you obtain sgnificant F-satatistics and p-value |
| table_name is the name of the table on which you want to run the ANOVA |
| the selected table MUST have the following signature: |
| |
| groups | measurement |
| |
| exemple with the product_type_age table: |
| |
| type | age |
| ---------- |
| 'Coat', 36 |
| 'Coat', 36 |
| 'Hat/beanie', 32 |
| ... |
| |
| min_sample_size is used to exclude categories that does not have enough measurement. |
| default = 0: all categories are selected |
| |
| the return result is the raw dataframe that correspond to the pair wize categorie that reject the hypothesis of non statistically difference between two group |
| the signature of the dataframe is the following: |
| group1 | group2 | meandiff p-adj | lower | upper | reject (only true) |
| |
| ''' |
| return var_stats.tukey_test(db_interface, table_name=table_name, min_sample_size=int(min_sample_size)) |
|
|
| def do_tsne_embedding(query): |
| """ |
| |
| this tool allow to run a TSNE dimensionality reduction algorythme and a clustering (HDBSCAN) on top of that. |
| |
| the input query, is a sql query that MUST return a table with at least the item id and the corresponding embeddding. |
| FOR COMPUTATIONAL PURPOSE, THE QUERY YOU SEND MUST NOT RETURN A TABLE GREATER THAN 500 OUTPUT ROWS |
| exemple: |
| result = db_connection.read_only_query(query) |
| result shape: |
| article_id | embedding |
| 0125456 | [0.3, 0.5 ...] |
| |
| the return is a dictionnary that has the following format: |
| |
| return { |
| "ids": ids, |
| "x_axis": tsne_projection_x_list, |
| "y_axis": tsne_projection_y_list, |
| "labels": labels |
| } |
| """ |
|
|
| return var_stats.embedding_clustering(db_interface, query) |
|
|
| def do_vector_centroid(query): |
| """ |
| this tool allow you to compute the centroid of a list of embedding vectors |
| the input query, is a sql query that MUST return a table with only 1 column, the embeddings. |
| exemple: |
| result = db_connection.read_only_query(query) |
| result shape: |
| embedding |
| [0.3, 0.5 ...] |
| |
| the return value is the computed centroid vector, that you can use to work with. |
| """ |
| return var_stats.vector_centroid(db_interface, query) |
|
|
| def embed_text_modal_api(text): |
| """ |
| This function allows you to send a list of text to be embeded by our |
| external model. |
| the format should be the following: |
| [ |
| "text_to_embed 1", |
| "text_to_embed 2", |
| "text_to_embed 3", |
| ... |
| ] |
| the return is the vector embedding corresponding to the text you input or a list of vector in case you gave a list of text. |
| thoses vectors are compatible with the one you can find in the database and can be used to be compared with vectors you'll get by querying the database. |
| |
| """ |
| to_list = ast.literal_eval(text) |
| response = requests.post( |
| "https://beeguy74--embeddings-api-fastapi-app.modal.run/embed", |
| json={"texts": to_list} |
| ) |
| if response.status_code != 200: |
| return f"Embedding API failed: {response.status_code} - {response.text}" |
| embeddings = response.json().get("embeddings") |
|
|
| return embeddings |
|
|
| def generate_code_wrapper(user_request: str): |
| """ |
| ### `generate_code_wrapper(user_request: str)` |
| - **Purpose**: Generate Python code based on user request |
| - **Parameters**: `user_request` - Textual description of the analysis |
| - **Returns**: Generated code and status message |
| - **Use Case**: AI-powered code generation for data analysis""" |
| if not user_request.strip(): |
| return "❌ Please provide a request", "" |
| |
| output, status = api_service.generate_code(user_request) |
| return output or "No output generated", status |
|
|
| def generate_graph_wrapper(graph_type: str, data_json: str): |
| """ |
| ### `generate_graph_wrapper(graph_type: str, data_json: str)` |
| - **Purpose**: Create visualizations using matplotlib |
| - **Parameters**: `graph_type` - Type of chart (bar, line, pie, scatter) |
| - **Returns**: Image file path and status |
| - **Use Case**: Creating charts and graphs for presentations""" |
| try: |
| if not graph_type.strip() or not data_json.strip(): |
| return None, "❌ Please provide both graph type and data" |
| |
| data_dict = json.loads(data_json) |
| image_path, status = api_service.generate_graph(graph_type, data_dict) |
| return image_path, status |
| |
| except json.JSONDecodeError: |
| return None, "❌ Invalid JSON format in data field" |
| except Exception as e: |
| return None, f"❌ Error: {str(e)}" |
|
|
| def query_and_generate_graph_wrapper(query: str, graph_type: str): |
| """ |
| Executes a SQL query and generates a graph visualization from the results. |
| This function is particularly useful when query results are too large for the context window |
| or when visual representation of data is preferred over tabular format. |
| Args: |
| query (str): SQL query to execute. Must return at least two columns where the first |
| column represents labels and the second represents values. |
| graph_type (str): Type of graph to generate (e.g., 'bar', 'line', 'pie', etc.). |
| Returns: |
| tuple: A tuple containing: |
| - image_path (str or None): Path to the generated graph image if successful, None if failed |
| - status (str): Status message indicating success or error details |
| """ |
| if not query.strip(): |
| return None, "❌ Please provide a SQL query." |
| if not graph_type.strip(): |
| return None, "❌ Please provide a graph type." |
|
|
| query_result = run_read_only_query(query) |
|
|
| if isinstance(query_result, str) and query_result.startswith("❌"): |
| return None, f"❌ Query execution failed: {query_result}" |
| |
| if not query_result: |
| return None, "❌ Query returned no data." |
|
|
| data_dict = {} |
| try: |
| if isinstance(query_result, list) and len(query_result) > 0: |
| |
| |
| if isinstance(query_result[0], (list, tuple)) and len(query_result[0]) >= 2: |
| |
| if all(isinstance(item, str) for item in query_result[0]): |
| headers = query_result[0] |
| data_rows = query_result[1:] |
| else: |
| headers = ["labels", "values"] |
| data_rows = query_result |
|
|
| if not data_rows: |
| return None, "❌ Query returned headers but no data rows." |
|
|
| data_dict["labels"] = [str(row[0]) for row in data_rows] |
| data_dict["values"] = [row[1] for row in data_rows] |
| else: |
| return None, "❌ Query result format not suitable for graphing (PostgreSQL). Expected at least two columns." |
| else: |
| return None, "❌ Query returned no data or unexpected format (PostgreSQL)." |
|
|
| if not data_dict.get("labels") or not data_dict.get("values"): |
| return None, "❌ Failed to extract labels and values from query result." |
|
|
| image_path, status = api_service.generate_graph(graph_type, data_dict) |
| return image_path, status |
|
|
| except Exception as e: |
| return None, f"❌ Error processing query result or generating graph: {str(e)}" |
|
|
|
|
| def download_file_wrapper(file_path: str): |
| """### `download_file_wrapper(file_path: str)` |
| - **Purpose**: Download files from the service |
| - **Use Case**: Retrieving generated reports or datasets""" |
| if not file_path.strip(): |
| return "❌ Please provide a file path" |
| |
| local_path, status = api_service.download_file(file_path) |
| return status |
|
|
| def get_mcp_server_instructions(): |
| """ |
| Returns comprehensive usage guidelines and documentation for all MCP server functions. |
| Call this function first to understand available tools, workflows, and best practices. |
| |
| This function provides: |
| - Complete function documentation |
| - Recommended workflows |
| - Best practices for MCP clients |
| - Database schema information |
| - Statistical analysis guidelines |
| """ |
| return """ |
| ## Purpose |
| This MCP server provides comprehensive e-commerce database analytics capabilities, enabling clients to explore database schemas, run queries, perform statistical analysis, and generate AI-powered insights. The server is designed to help users analyze customer behavior, sales patterns, and business metrics from e-commerce data. |
| |
| ## 🎯 Use Cases |
| This MCP server is designed for: |
| - **E-commerce Analytics**: Customer behavior, sales patterns, product performance |
| - **Business Intelligence**: KPI tracking, trend analysis, forecasting |
| - **Statistical Research**: Hypothesis testing, comparative analysis |
| - **Data Exploration**: Schema discovery, data profiling, relationship analysis |
| - **AI-Assisted Insights**: Natural language to analysis, automated reporting |
| |
| ## 📊 Database Schema & Discovery Functions |
| ### `get_schemas()`**Purpose**: Retrieve all database schemas |
| ### `get_db_infos()` **Purpose**: Get comprehensive database information and metadata |
| ### `get_list_of_tables_in_schema(schema_name: str)` **Purpose**: List all tables within a specific schema |
| ### `get_list_of_column_in_table(schema_name: str, table_name: str)` **Purpose**: Get detailed column information for a specific table |
| |
| ## 🔍 Query & Data Manipulation Functions |
| ### `run_read_only_query(query: str)` **Purpose**: Execute read-only SQL queries safely |
| ### `create_table_from_query(table_name: str, source_query: str)` **Purpose**: Create permanent tables from SELECT queries |
| ### `drop_table(table_name: str)` **Purpose**: Remove tables from the database- **Use Case**: Cleaning up temporary analysis tables |
| |
| ## 📈 Statistical Analysis Functions |
| ### `do_annova(table_name: str, min_sample_size: int = 0)` **Purpose**: Perform ANOVA (Analysis of Variance) statistical test- **Use Case**: Testing if there are significant differences between group means |
| ### `do_tukey_test(table_name: str, min_sample_size: int = 0)` **Purpose**: Perform Tukey's HSD post-hoc analysis after ANOVA **Use Case**: Identifying which specific groups differ significantly **Prerequisite**: Should be used after significant ANOVA results |
| |
| ## 🤖 AI-Powered Analytics Functions |
| ### `generate_code_wrapper(user_request: str)`- **Purpose**: Generate and execute Python code using AI based on natural language requests |
| ### `generate_graph_wrapper(graph_type: str, data_json: str)`- **Purpose**: Create visualizations using matplotlib **Use Case**: Creating charts and graphs for presentations- **Example**: `{"labels": ["Q1", "Q2", "Q3"], "values": [100, 150, 200]}` |
| ### `download_file_wrapper(file_path: str)`- **Purpose**: Download files from the analytics service |
| |
| ## 🔄 Recommended Workflows |
| ### 1. Discovery Workflow |
| get_schemas() → Discover available schemas |
| get_list_of_tables_in_schema("public") → Find tables |
| get_list_of_column_in_table("public", "customers") → Understand structure |
| run_read_only_query("SELECT * FROM customers LIMIT 5") → Sample data |
| |
| ### 2. Analysis Workflow |
| run_read_only_query() → Explore data |
| create_table_from_query() → Create analysis datasets |
| do_annova() → Statistical testing |
| do_tukey_test() → Post-hoc analysis |
| generate_graph_wrapper() → Visualize results |
| |
| ### 3. AI-Assisted Analysis |
| generate_code_wrapper("Analyze customer segments") → Get AI insights |
| Use results to guide further manual analysis |
| generate_graph_wrapper() → Visualize AI findings |
| |
| ## ✅ Best Practices for MCP Clients |
| 1. **Start with Discovery**: Always begin by exploring schemas and tables before analysis |
| 2. **Use Read-Only Queries**: Prefer `run_read_only_query()` for exploration to maintain data safety |
| 3. **Statistical Validation**: Use `do_annova()` before `do_tukey_test()` for proper statistical workflow |
| 4. **AI Enhancement**: Use `generate_code_wrapper()` for complex analysis that would be difficult to code manually |
| 5. **Clean Up**: Use `drop_table()` to remove temporary analysis tables when done |
| 6. **Error Handling**: All functions return status indicators - check for errors before proceeding |
| 7. **Data Safety**: Core tables (transactions, customers, articles) are protected from modification""" |
|
|
| |
| with gr.Blocks(title="Database Configuration") as tab01: |
| gr.Markdown("# 🔌 Database Configuration") |
| gr.Markdown("*Configure your database connection before using the analytics platform*") |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 🗄️ Database Connection") |
| host_input = gr.Textbox(label="Host", placeholder="database.example.com", value="") |
| port_input = gr.Textbox(label="Port", placeholder="5432", value="") |
| database_input = gr.Textbox(label="Database", placeholder="my_database", value="") |
| user_input = gr.Textbox(label="User", placeholder="db_user", value="") |
| password_input = gr.Textbox(label="Password", type="password", placeholder="••••••••", value="") |
| |
| connect_btn = gr.Button("🔌 Connect to Database", variant="primary") |
| |
| with gr.Column(scale=1): |
| connection_status = gr.Textbox(label="🔌 Connection Status", value=db_connection_status, interactive=False) |
| gr.Markdown("### ℹ️ Instructions") |
| gr.Markdown(""" |
| 1. **Fill in your database credentials** |
| 2. **Click 'Connect to Database'** |
| 3. **Wait for successful connection** |
| 4. **Proceed to other tabs once connected** |
| |
| **Note**: All database operations require a valid connection. |
| """) |
| |
| def handle_connection(host, port, database, user, password): |
| status, success = setup_database_connection(host, port, database, user, password) |
| return status |
| |
| connect_btn.click( |
| handle_connection, |
| inputs=[host_input, port_input, database_input, user_input, password_input], |
| outputs=connection_status |
| ) |
|
|
| |
| with gr.Blocks(title="Database Operations") as tab1: |
| gr.Markdown("# 🗄️ Database Operations") |
| gr.Markdown("*Explore database schema, tables, and run queries*") |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 🗄️ Database Schema") |
| discover_btn = gr.Button("📋 Get Schemas", variant="primary") |
| database_info_btn = gr.Button("ℹ️ Get Database Info", variant="secondary") |
| with gr.Column(scale=2): |
| schema_info = gr.Textbox(label="📋 Schema Information", lines=5) |
| db_info = gr.Textbox(label="ℹ️ Database Information", lines=5) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 📊 Table Explorer") |
| table_in_schema_input = gr.Textbox(label="Schema Name", placeholder="public") |
| table_in_schema_btn = gr.Button("Get Tables") |
|
|
| with gr.Column(scale=2): |
| table_in_schema = gr.Textbox(label="📊 Tables in Schema", lines=5) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 📄 Column Explorer") |
| schema_input = gr.Textbox(label="Schema Name", placeholder="public") |
| table_input = gr.Textbox(label="Table Name", placeholder="customers") |
| column_btn = gr.Button("Get Columns") |
|
|
| with gr.Column(scale=2): |
| column_output = gr.Textbox(label="📄 Table Columns", lines=5) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 🔍 SQL Query") |
| query_input = gr.Textbox(label="SQL Query", lines=3, placeholder="SELECT * FROM customers LIMIT 10") |
| query_btn = gr.Button("Execute Query", variant="primary") |
|
|
| with gr.Column(scale=2): |
| query_output = gr.Textbox(label="🔍 Query Results", lines=8) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 🔍 Create Table") |
| table_name_input = gr.Textbox(label="Table Name", placeholder="table") |
| source_query_input = gr.Textbox(label="Source Query", lines=3, placeholder="SELECT * FROM customers LIMIT 10") |
| create_table_from_query_btn = gr.Button("Create Table", variant="primary") |
|
|
| with gr.Column(scale=2): |
| table_status = gr.Textbox(label="table status") |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 🔍 Drop Table") |
| drop_table_name_input = gr.Textbox(label="Table Name", placeholder="table") |
| drop_table_btn = gr.Button("Drop Table", variant="primary") |
| |
| gr.Markdown("### 🎨 Sample Visualization") |
| generate_sample_btn = gr.Button("Generate Sample", variant="secondary") |
| |
| with gr.Column(scale=2): |
| drop_table_status = gr.Textbox(label="drop table status") |
| output_image = gr.Image(label="🎨 Generated Visualization", type="filepath") |
| |
| |
| discover_btn.click(get_schemas, outputs=schema_info) |
| database_info_btn.click(get_db_infos, outputs=db_info) |
| table_in_schema_btn.click(get_list_of_tables_in_schema, inputs=table_in_schema_input, outputs=table_in_schema) |
| column_btn.click(get_list_of_column_in_table, inputs=[schema_input, table_input], outputs=column_output) |
| query_btn.click(run_read_only_query, inputs=query_input, outputs=query_output) |
| generate_sample_btn.click(serve_image_from_path, outputs=output_image) |
| create_table_from_query_btn.click(create_table_from_query, inputs=[table_name_input, source_query_input], outputs=table_status) |
| drop_table_btn.click(drop_table, inputs=drop_table_name_input, outputs=drop_table_status) |
|
|
| |
| with gr.Blocks(title="AI Analytics") as tab2: |
| gr.Markdown("# 🤖 AI-Powered Analytics") |
| gr.Markdown("*Generate code, create visualizations, and manage files with AI*") |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 🤖 AI Code Generation") |
| code_request_input = gr.Textbox( |
| label="Analysis Request", |
| lines=3, |
| placeholder="Analyze customer purchase patterns..." |
| ) |
| generate_code_btn = gr.Button("🧠 Generate Code", variant="primary") |
|
|
| with gr.Column(scale=2): |
| code_output = gr.Textbox(label="🤖 AI Generated Code/Analysis", lines=10) |
| code_status = gr.Textbox(label="Code Status", lines=2) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 📈 Graph Generation") |
| graph_type_input = gr.Textbox(label="Graph Type", placeholder="bar, line, pie, scatter") |
| data_dict_input = gr.Textbox( |
| label="Data (JSON format)", |
| lines=3, |
| placeholder='{"labels": ["A", "B", "C"], "values": [1, 2, 3]}' |
| ) |
| generate_graph_btn = gr.Button("📊 Generate Graph", variant="primary") |
| gr.Markdown("### 🔍 Query & Generate Graph") |
| query_for_graph_input = gr.Textbox( |
| label="SQL Query for Graph", |
| lines=3, |
| placeholder="SELECT category, COUNT(*) FROM sales GROUP BY category" |
| ) |
| graph_type_for_query_input = gr.Textbox(label="Graph Type", placeholder="bar, line, pie, scatter") |
| query_and_graph_btn = gr.Button("📈 Query & Generate Graph", variant="primary") |
|
|
| with gr.Column(scale=2): |
| graph_output = gr.Image(label="📈 Generated Graph", type="filepath") |
| graph_status = gr.Textbox(label="Graph Status", lines=2) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 📁 File Download") |
| file_path_input = gr.Textbox(label="File Path", placeholder="path/to/file.csv") |
| download_btn = gr.Button("📥 Download File", variant="secondary") |
|
|
| gr.Markdown("### 📍 Embed text") |
| embed_text_input = gr.Textbox(label="Text to embed", placeholder="lorem ipsum") |
| embed_btn = gr.Button("📍 Embed text", variant="secondary") |
| |
| with gr.Column(scale=2): |
| download_status = gr.Textbox(label="📁 Download Status", lines=3) |
| embed_text = gr.Textbox(label="Vector") |
| |
| |
| generate_code_btn.click( |
| generate_code_wrapper, |
| inputs=code_request_input, |
| outputs=[code_output, code_status] |
| ) |
| generate_graph_btn.click( |
| generate_graph_wrapper, |
| inputs=[graph_type_input, data_dict_input], |
| outputs=[graph_output, graph_status] |
| ) |
| download_btn.click( |
| download_file_wrapper, |
| inputs=file_path_input, |
| outputs=download_status |
| ) |
| embed_btn.click( |
| embed_text_modal_api, |
| inputs=embed_text_input, |
| outputs=embed_text |
| ) |
| query_and_graph_btn.click( |
| query_and_generate_graph_wrapper, |
| inputs=[query_for_graph_input, graph_type_for_query_input], |
| outputs=[graph_output, graph_status] |
| ) |
|
|
| |
| with gr.Blocks(title="Statistical Analysis") as tab4: |
| gr.Markdown("# 📊 Statistical Analysis") |
| gr.Markdown("*Run statistical tests on your data*") |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### enter a dict that comply for annova function") |
| annova_input = gr.Textbox(label="annova") |
| annova_min_sample_input = gr.Textbox(label="min sample size for annova") |
| annova_btn = gr.Button("run annova") |
|
|
| gr.Markdown("### enter a table that comply for tukey function") |
| tukey_input = gr.Textbox(label="tukey") |
| tukey_min_sample_input = gr.Textbox(label="min sample size for tukey") |
| tukey_btn = gr.Button("run tukey") |
|
|
| gr.Markdown("### Enter a query that comply with the requested embedding format") |
| tsne_cluster_input = gr.Textbox(label="embedding_table") |
| tsne_cluster_btn = gr.Button("run TSNE") |
|
|
| gr.Markdown("### Enter a query that comply with the requested embedding centroid format") |
| vector_centroid_input = gr.Textbox(label="embedding_table_for_vector") |
| vector_centroid_btn = gr.Button("Compute centroid") |
|
|
|
|
| with gr.Column(scale=2): |
| annova_output = gr.Textbox(label="annova output") |
| tukey_output = gr.Textbox(label="tukey output") |
| tsne_output = gr.Textbox(label="tsne_clustering output") |
| vector_centroid_output = gr.Textbox(label="Centroid") |
| |
| |
| annova_btn.click(do_annova, inputs=[annova_input, annova_min_sample_input], outputs=annova_output) |
| tukey_btn.click(do_tukey_test, inputs=[tukey_input, tukey_min_sample_input], outputs=tukey_output) |
| tsne_cluster_btn.click(do_tsne_embedding, inputs=tsne_cluster_input, outputs=tsne_output) |
| vector_centroid_btn.click(do_vector_centroid, inputs=vector_centroid_input, outputs=vector_centroid_output) |
|
|
| with gr.Blocks(title="MCP guidelines") as tab5: |
| gr.Markdown("### 📚 Server Documentation") |
| instructions_btn = gr.Button("📖 Get MCP Instructions", variant="secondary") |
| instructions_output = gr.Textbox(label="📚 MCP Server Instructions", lines=15) |
| instructions_btn.click(get_mcp_server_instructions, outputs=instructions_output) |
|
|
| with gr.Blocks(title="Application Guide") as tab0: |
| gr.Markdown("# 🎯 E-commerce Database Analytics MCP Server") |
| gr.Markdown("*Your comprehensive guide to data analysis and business intelligence*") |
| |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown(""" |
| ## 🌟 Welcome to Your Data Analytics MCP Server! |
| |
| ## Please checkout our video demo: https://www.youtube.com/watch?v=3sx99xgLpfc 👀🤗 |
| |
| This is a comprehensive e-commerce database analytics MCP Server that combines traditional database operations with cutting-edge AI-powered insights. Whether you're a data analyst, business intelligence professional, or researcher, this MCP server provides everything you need to explore, analyze, and extract insights from e-commerce data. |
| """) |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown(""" |
| ## 🎯 What This MCP Server Does |
| """) |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown(""" |
| ### 🗄️ **Database Operations** |
| - **Explore database structure** (schemas, tables, columns) |
| - **Execute safe SQL queries** (read-only for data safety) |
| - **Create and manage tables** for analysis |
| - **Sample and preview data** before deep analysis |
| |
| ### 🤖 **AI-Powered Analytics** |
| - **Natural language to code generation** - Ask questions in plain English |
| - **Automated data visualization** - Generate charts and graphs instantly |
| - **File management** - Download reports and datasets |
| - **Text embedding** - Convert text to vector representations |
| """) |
| with gr.Column(scale=1): |
| gr.Markdown(""" |
| ### 📊 **Advanced Statistical Analysis** |
| - **ANOVA testing** - Compare means across groups |
| - **Tukey post-hoc tests** - Identify specific group differences |
| - **t-SNE clustering** - Visualize high-dimensional data |
| - **Vector analysis** - Calculate centroids and similarities |
| |
| ### 📚 **MCP Server Integration** |
| - **Complete API documentation** for external clients |
| - **Function reference** for all available operations |
| - **Integration guidelines** for seamless connectivity |
| """) |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown(""" |
| ## 🚀 How to Use Our MCP Server |
| """) |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown(""" |
| ### For Beginners: Start Here! |
| |
| 1. **🗄️ Go to "Database Operations" tab** |
| - Click "Get Schemas" to see available data |
| - Explore tables with "Get Tables" |
| - Check table structure with "Get Columns" |
| - Run your first query: `SELECT * FROM customers LIMIT 10` |
| |
| 2. **🤖 Try "AI Analytics" tab** |
| - Ask in plain English: "Show me customer demographics" |
| - Generate charts by providing data in JSON format |
| - Let AI write complex analysis code for you |
| |
| 3. **📊 Use "Statistical Analysis" tab** |
| - Test hypotheses with ANOVA |
| - Find group differences with Tukey tests |
| - Visualize data patterns with t-SNE |
| """) |
| with gr.Column(scale=1): |
| gr.Markdown(""" |
| ### For Advanced Users: |
| - **Create custom analysis tables** using SQL queries |
| - **Build statistical models** with embedding vectors |
| - **Generate automated reports** through AI integration |
| - **Develop MCP client applications** using our API |
| """) |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown(""" |
| ## 🔧 Main Methods & Functions |
| ### 🗄️ Database Discovery |
| ```python |
| get_schemas() # List all database schemas |
| get_db_infos() # Get database metadata |
| get_list_of_tables_in_schema(schema) # Find tables in schema |
| get_list_of_column_in_table(schema, table) # Get column information |
| ``` |
| |
| ### 📊 Data Analysis |
| ```python |
| run_read_only_query(sql_query) # Execute safe SELECT queries |
| create_table_from_query(name, query) # Create tables from analysis |
| drop_table(table_name) # Remove temporary tables |
| ``` |
| |
| ### 🤖 AI-Powered Functions |
| ```python |
| generate_code_wrapper(natural_language) # Convert questions to code |
| generate_graph_wrapper(type, data_json) # Create visualizations |
| query_and_generate_graph(query, type) # Query data + create graph |
| embed_text_modal_api(text) # Convert text to vectors |
| ``` |
| |
| ### 📈 Statistical Methods |
| ```python |
| do_annova(table, min_sample_size) # ANOVA statistical test |
| do_tukey_test(table, min_sample_size) # Tukey post-hoc analysis |
| do_tsne_embedding(query) # t-SNE clustering |
| do_vector_centroid(query) # Calculate vector centroids |
| ``` |
| |
| ### 📁 File Operations |
| ```python |
| download_file_wrapper(file_path) # Download generated files |
| get_mcp_server_instructions() # Get complete API docs |
| ``` |
| """) |
| with gr.Row(): |
| with gr.Accordion("💡 Common Use Cases & Examples!", open=False): |
| gr.Markdown(""" |
| ### 🛍️ **E-commerce Analysis** |
| - **Customer Segmentation**: "Find customer groups by purchase behavior" |
| - **Sales Performance**: "Compare revenue across product categories" |
| - **Market Analysis**: "Identify trending products and seasonal patterns" |
| |
| ### 📊 **Business Intelligence** |
| - **KPI Monitoring**: Track key performance indicators |
| - **Trend Analysis**: Identify business trends and patterns |
| - **Forecasting**: Predict future sales and customer behavior |
| |
| ### 🔬 **Research & Analytics** |
| - **Hypothesis Testing**: Validate business assumptions with statistics |
| - **A/B Testing**: Compare different strategies or products |
| - **Data Mining**: Discover hidden patterns in large datasets |
| """) |
| with gr.Row(): |
| with gr.Accordion("🎨 Visualization Examples", open=False): |
| gr.Markdown(""" |
| ### Create Charts Instantly: |
| ```json |
| { |
| "labels": ["Q1", "Q2", "Q3", "Q4"], |
| "values": [150000, 180000, 220000, 195000] |
| } |
| ``` |
| **Result**: Beautiful bar/line/pie charts in seconds! |
| |
| ### AI-Generated Analysis: |
| **Input**: "Analyze customer age distribution by product category" |
| **Output**: Complete Python code + statistical analysis + visualizations |
| |
| ## ⚡ Quick Start Examples |
| |
| ### Example 1: Customer Analysis |
| ```sql |
| -- Step 1: Explore customer data |
| SELECT * FROM customers LIMIT 10; |
| |
| -- Step 2: Analyze demographics |
| SELECT age_group, COUNT(*) |
| FROM customers |
| GROUP BY age_group; |
| |
| -- Step 3: Create analysis table |
| CREATE TABLE age_analysis AS |
| SELECT age_group, AVG(total_spent) as avg_spending |
| FROM customers |
| GROUP BY age_group; |
| ``` |
| |
| ### Example 2: AI-Powered Insights |
| **Natural Language Request**: |
| "Compare average order values between different customer segments and create a visualization" |
| |
| **AI Will Generate**: |
| - SQL queries to extract relevant data |
| - Statistical analysis of the differences |
| - Visualization code for clear presentation |
| - Interpretation of the results |
| |
| """) |
| with gr.Row(): |
| with gr.Accordion("🛡️ Safety Features", open=False): |
| gr.Markdown(""" |
| - **Read-Only Queries**: Data exploration without modification risk |
| - **Core Table Protection**: System tables cannot be accidentally deleted |
| - **Error Handling**: Clear feedback for all operations |
| - **Transaction Safety**: Automatic rollback on errors |
| """) |
| with gr.Row(): |
| with gr.Accordion("🎯 Best Practices", open=False): |
| gr.Markdown(""" |
| 1. **Start with Discovery**: Always explore data structure first |
| 2. **Sample Before Analyzing**: Check data quality with small samples |
| 3. **Use Descriptive Names**: Name analysis tables clearly |
| 4. **Leverage AI**: Use natural language for complex analysis |
| 5. **Clean Up**: Remove temporary tables when finished |
| 6. **Check Results**: Verify outputs before making decisions |
| """) |
| with gr.Row(): |
| with gr.Accordion("🚀 Ready to Begin?", open=False): |
| gr.Markdown(""" |
| ### Choose your starting point: |
| - **New to the this MCP Server?** → Start with "Database Operations" |
| - **Want AI help?** → Jump to "AI Analytics" |
| - **Need statistics?** → Go to "Statistical Analysis" |
| - **Building integrations?** → Check "MCP Guidelines" |
| |
| **Let's turn your data into insights! 📈✨** |
| """) |
|
|
| |
| interface = gr.TabbedInterface( |
| [tab0, tab01, tab1, tab2, tab4, tab5], |
| tab_names=["🎯 Guide","🔌 Database Setup", "🗄️ Database Operations", "🤖 AI Analytics", "📊 Statistical Analysis", "📊 MCP guidelines"], |
| title="E-commerce Database Analytics MCP Server", |
| theme=gr.themes.Soft() |
| ) |
|
|
| |
| if __name__ == "__main__": |
| print("🚀 Starting E-commerce Database Analytics MCP Server...") |
| print(f"🌐 Dashboard: http://localhost:7860") |
| print("🔗 Integrated with FastAPI service for AI analytics") |
| |
| interface.launch(server_name="0.0.0.0", server_port=7860, share=True, mcp_server=True) |