Commit ·
0391cfb
1
Parent(s): 938a3f9
feat: demo with yugabyte
Browse files- .env.sample +3 -0
- gradio_app.py +26 -10
- langchain_mcp_client.py +1 -1
- postgre_mcp_server.py +6 -3
- requirements.txt +0 -0
- table_summary_yugabyte.txt +153 -0
.env.sample
CHANGED
|
@@ -4,6 +4,9 @@ TABLE_SUMMARY_PATH=
|
|
| 4 |
DB_URL=
|
| 5 |
DB_SCHEMA=
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
GEMINI_API_KEY=
|
| 8 |
GEMINI_MODEL=
|
| 9 |
GEMINI_MODEL_PROVIDER=
|
|
|
|
| 4 |
DB_URL=
|
| 5 |
DB_SCHEMA=
|
| 6 |
|
| 7 |
+
# DB URL with root.crt for yugabyte
|
| 8 |
+
# DB_URL="postgresql://username:password@us-east-1.14747abb-66a1-48eb-a85f-d434c56d612b.aws.yugabyte.cloud:5433/yugabyte?sslmode=verify-full&sslrootcert=root.crt"
|
| 9 |
+
|
| 10 |
GEMINI_API_KEY=
|
| 11 |
GEMINI_MODEL=
|
| 12 |
GEMINI_MODEL_PROVIDER=
|
gradio_app.py
CHANGED
|
@@ -11,6 +11,7 @@ from memory_store import MemoryStore
|
|
| 11 |
import logging
|
| 12 |
|
| 13 |
|
|
|
|
| 14 |
# ======================================= Load DB configs
|
| 15 |
def load_db_configs():
|
| 16 |
"""Load database configurations from configs.yaml"""
|
|
@@ -121,41 +122,56 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
|
| 121 |
scale=4
|
| 122 |
),
|
| 123 |
theme="soft",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
examples=[
|
| 125 |
"Describe the database",
|
| 126 |
"List all tables in the database",
|
| 127 |
"List all tables with columns and data types",
|
| 128 |
-
"How many
|
| 129 |
-
"What are the statuses my of my customers",
|
| 130 |
"Visualize with different colors and show legend",
|
| 131 |
-
"
|
| 132 |
-
"
|
| 133 |
-
"
|
| 134 |
-
"How many users and roles have been created in 2024"
|
| 135 |
],
|
| 136 |
save_history=True,
|
| 137 |
type="messages"
|
| 138 |
)
|
| 139 |
with gr.Column(scale=1):
|
| 140 |
with gr.Accordion("Example Questions", open=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
gr.Markdown("""
|
| 142 |
- 📊 List all tables in database
|
| 143 |
- 👥 Total number of customers
|
| 144 |
- 📈 Visualize it with different colors
|
| 145 |
- 📋 Order statistics for last 6 years
|
| 146 |
-
- 📆
|
| 147 |
""")
|
| 148 |
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
| 152 |
# TODO: maybe we can add a mcp tool to validate the results (those converted to DataFrame) to make sure the valid type is passed to the visualization tool by ReAct agent
|
| 153 |
|
| 154 |
|
| 155 |
if __name__ == "__main__":
|
| 156 |
demo.launch(
|
| 157 |
server_name="0.0.0.0",
|
| 158 |
-
server_port=7860,
|
| 159 |
share=True,
|
| 160 |
debug=True
|
| 161 |
)
|
|
|
|
| 11 |
import logging
|
| 12 |
|
| 13 |
|
| 14 |
+
|
| 15 |
# ======================================= Load DB configs
|
| 16 |
def load_db_configs():
|
| 17 |
"""Load database configurations from configs.yaml"""
|
|
|
|
| 122 |
scale=4
|
| 123 |
),
|
| 124 |
theme="soft",
|
| 125 |
+
# examples=[
|
| 126 |
+
# "Describe the database",
|
| 127 |
+
# "List all tables in the database",
|
| 128 |
+
# "List all tables with columns and data types",
|
| 129 |
+
# "How many customers do you have?",
|
| 130 |
+
# "What are the statuses my of my customers",
|
| 131 |
+
# "Visualize with different colors and show legend",
|
| 132 |
+
# "What are the statues of my customers and how many are in each status, show it by percentage",
|
| 133 |
+
# "Total number of completed orders in six years by customer count show top most 10 customers",
|
| 134 |
+
# "In january how many products has been sold ? group them by year",
|
| 135 |
+
# "How many users and roles have been created in 2024"
|
| 136 |
+
# ],
|
| 137 |
examples=[
|
| 138 |
"Describe the database",
|
| 139 |
"List all tables in the database",
|
| 140 |
"List all tables with columns and data types",
|
| 141 |
+
"How many comments are there per ticket channel (email, chat, portal)? Also Visualize it as a pie chart",
|
|
|
|
| 142 |
"Visualize with different colors and show legend",
|
| 143 |
+
"How many customers are in each industry?",
|
| 144 |
+
"List the 5 most active agents by ticket count in 2024.",
|
| 145 |
+
"How many tickets were reopened at least once?"
|
|
|
|
| 146 |
],
|
| 147 |
save_history=True,
|
| 148 |
type="messages"
|
| 149 |
)
|
| 150 |
with gr.Column(scale=1):
|
| 151 |
with gr.Accordion("Example Questions", open=True):
|
| 152 |
+
# gr.Markdown("""
|
| 153 |
+
# - 📊 List all tables in database
|
| 154 |
+
# - 👥 Total number of customers
|
| 155 |
+
# - 📈 Visualize it with different colors
|
| 156 |
+
# - 📋 Order statistics for last 6 years
|
| 157 |
+
# - 📆 User and role counts in 2024
|
| 158 |
+
# """)
|
| 159 |
gr.Markdown("""
|
| 160 |
- 📊 List all tables in database
|
| 161 |
- 👥 Total number of customers
|
| 162 |
- 📈 Visualize it with different colors
|
| 163 |
- 📋 Order statistics for last 6 years
|
| 164 |
+
- 📆 Average ticket reopen count per year
|
| 165 |
""")
|
| 166 |
|
| 167 |
|
|
|
|
|
|
|
| 168 |
# TODO: maybe we can add a mcp tool to validate the results (those converted to DataFrame) to make sure the valid type is passed to the visualization tool by ReAct agent
|
| 169 |
|
| 170 |
|
| 171 |
if __name__ == "__main__":
|
| 172 |
demo.launch(
|
| 173 |
server_name="0.0.0.0",
|
| 174 |
+
# server_port=7860,
|
| 175 |
share=True,
|
| 176 |
debug=True
|
| 177 |
)
|
langchain_mcp_client.py
CHANGED
|
@@ -15,7 +15,6 @@ from langchain_community.chat_message_histories import ChatMessageHistory
|
|
| 15 |
from memory_store import MemoryStore
|
| 16 |
|
| 17 |
|
| 18 |
-
|
| 19 |
# set_debug(True)
|
| 20 |
|
| 21 |
|
|
@@ -103,6 +102,7 @@ def load_table_summary(path: str) -> str:
|
|
| 103 |
with open(path, 'r') as file:
|
| 104 |
return file.read()
|
| 105 |
|
|
|
|
| 106 |
def get_server_params() -> StdioServerParameters:
|
| 107 |
return StdioServerParameters(
|
| 108 |
command="python",
|
|
|
|
| 15 |
from memory_store import MemoryStore
|
| 16 |
|
| 17 |
|
|
|
|
| 18 |
# set_debug(True)
|
| 19 |
|
| 20 |
|
|
|
|
| 102 |
with open(path, 'r') as file:
|
| 103 |
return file.read()
|
| 104 |
|
| 105 |
+
|
| 106 |
def get_server_params() -> StdioServerParameters:
|
| 107 |
return StdioServerParameters(
|
| 108 |
command="python",
|
postgre_mcp_server.py
CHANGED
|
@@ -15,7 +15,10 @@ import logging
|
|
| 15 |
DEFAULT_QUERY_LIMIT = 100
|
| 16 |
|
| 17 |
# logging info
|
| 18 |
-
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# Define our own PromptMessage class if the MCP one isn't available
|
| 21 |
@dataclass
|
|
@@ -148,10 +151,10 @@ You can use the following FastMCP tools to create **read-only** queries (e.g., `
|
|
| 148 |
# Output Format
|
| 149 |
==========================
|
| 150 |
|
| 151 |
-
Present your final answer using the following structure
|
| 152 |
|
| 153 |
# Result
|
| 154 |
-
{{Take the result from the execute_query tool and format it nicely using Markdown. Use a Markdown table for tabular data (rows and columns) including headers. Use bullet points or items in markdown for answers that include lists of names or descriptions. Use plain text for single values or simple messages. Ensure data alignment and clarity.}}
|
| 155 |
|
| 156 |
# Explanation
|
| 157 |
{{Provide a concise explanation or interpretation of the results (and visualization, if applicable) in 1-3 sentences. Explain what the data and visualization (if any) represent in the context of the user's request.}}
|
|
|
|
| 15 |
DEFAULT_QUERY_LIMIT = 100
|
| 16 |
|
| 17 |
# logging info
|
| 18 |
+
# logging.basicConfig(level=logging.INFO)
|
| 19 |
+
|
| 20 |
+
# get logger
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
|
| 23 |
# Define our own PromptMessage class if the MCP one isn't available
|
| 24 |
@dataclass
|
|
|
|
| 151 |
# Output Format
|
| 152 |
==========================
|
| 153 |
|
| 154 |
+
Present your final answer using the following structure in markdown language:
|
| 155 |
|
| 156 |
# Result
|
| 157 |
+
{{Take the result from the execute_query tool and format it nicely using Markdown. Use a beautiful Markdown table for tabular data (rows and columns) including headers and show such simple results using a table. Use bullet points or items in markdown for answers that include lists of names or descriptions. Use plain text for single values or simple messages. Ensure data alignment and clarity.}}
|
| 158 |
|
| 159 |
# Explanation
|
| 160 |
{{Provide a concise explanation or interpretation of the results (and visualization, if applicable) in 1-3 sentences. Explain what the data and visualization (if any) represent in the context of the user's request.}}
|
requirements.txt
CHANGED
|
Binary files a/requirements.txt and b/requirements.txt differ
|
|
|
table_summary_yugabyte.txt
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Database: helpdesk
|
| 2 |
+
### Description:
|
| 3 |
+
This database is designed for managing customer support tickets and related comments in a helpdesk system. The database consists of three main tables containing customer information, support tickets, and ticket comments.
|
| 4 |
+
|
| 5 |
+
### All Tables:
|
| 6 |
+
- customer
|
| 7 |
+
- ticket
|
| 8 |
+
- ticket_comment
|
| 9 |
+
|
| 10 |
+
### Default Schema:
|
| 11 |
+
- public
|
| 12 |
+
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
## Table: `customer`
|
| 16 |
+
|
| 17 |
+
### Schema: `public`
|
| 18 |
+
|
| 19 |
+
### All Columns and their Descriptions:
|
| 20 |
+
- `customer_id` (integer): Unique identifier for the customer. Primary key.
|
| 21 |
+
- `name` (text): Name of the customer company.
|
| 22 |
+
- `industry` (text): Industry sector of the customer (Finance, Telecommunications, IT Services, Retail).
|
| 23 |
+
- `contact_email` (text): Contact email address.
|
| 24 |
+
- `contact_phone` (text): Contact phone number.
|
| 25 |
+
- `region` (text): Customer's region (Dammam, Riyadh, Jeddah, Mecca).
|
| 26 |
+
- `created_at` (timestamp): Date when the customer record was created.
|
| 27 |
+
- `status` (text): Current status of the customer (Active, Inactive, Prospect, Suspended, Blacklisted, Pending Verification, Closed).
|
| 28 |
+
|
| 29 |
+
### Relationships with Other Tables (Foreign Keys):
|
| 30 |
+
- `customer_id` is referenced in the `ticket` table.
|
| 31 |
+
|
| 32 |
+
### Cardinality of Relationships:
|
| 33 |
+
- One customer can have multiple support tickets (one-to-many).
|
| 34 |
+
|
| 35 |
+
### Common Use Cases/Example Queries:
|
| 36 |
+
- List active customers in a specific region
|
| 37 |
+
- Analyze customer distribution by industry
|
| 38 |
+
- Generate statistics based on customer status
|
| 39 |
+
- Example SQL Snippet: `SELECT * FROM customer WHERE status = 'Active' AND region = 'Riyadh';`
|
| 40 |
+
|
| 41 |
+
### Data Constraints and Business Rules:
|
| 42 |
+
- `customer_id` must be unique and is the primary key
|
| 43 |
+
- `status` values are restricted to a controlled vocabulary
|
| 44 |
+
- `created_at` must be a valid date
|
| 45 |
+
|
| 46 |
+
### Data Update Frequency/Volatility:
|
| 47 |
+
- Customer information typically changes with new customer registrations or updates to existing customer information
|
| 48 |
+
- Status updates occur regularly
|
| 49 |
+
|
| 50 |
+
### Important Notes/Considerations for Querying:
|
| 51 |
+
- Be careful when filtering by status
|
| 52 |
+
- Pay attention to correct region names in region-based analyses
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
|
| 56 |
+
## Table: `ticket`
|
| 57 |
+
|
| 58 |
+
### Schema: `public`
|
| 59 |
+
|
| 60 |
+
### All Columns and their Descriptions:
|
| 61 |
+
- `ticket_id` (text): Unique identifier for the support ticket. Primary key.
|
| 62 |
+
- `status` (text): Ticket status (Open, Resolved-Cancelled, Resolved-Completed).
|
| 63 |
+
- `sub_status` (text): Ticket sub-status (Pending for approval, Cancelled, Completed).
|
| 64 |
+
- `created_at` (timestamp): Date and time when the ticket was created.
|
| 65 |
+
- `service_number` (text): Service number.
|
| 66 |
+
- `customer_id` (integer): Customer ID. Foreign key to customer table.
|
| 67 |
+
- `sr_classification` (text): Service request classification (Financial).
|
| 68 |
+
- `type` (text): Ticket type (Complaint, Request, Inquiry).
|
| 69 |
+
- `area` (text): Main area (Billing).
|
| 70 |
+
- `sub_area` (text): Sub-area (Auto Adjustments, Mass Adjustment, Bulk Balance Transfer, Refunds).
|
| 71 |
+
- `owner` (text): Ticket owner (Agent A, Agent B, Agent C, etc.).
|
| 72 |
+
- `ticket_group` (text): Ticket group (Technical Support, Billing Team, Customer Service, NOC, Field Operations).
|
| 73 |
+
- `description` (text): Ticket description.
|
| 74 |
+
- `priority` (text): Priority level (Low, Medium, High).
|
| 75 |
+
- `re_open_count` (integer): Number of times the ticket was reopened.
|
| 76 |
+
- `follow_up_count` (integer): Number of follow-ups.
|
| 77 |
+
- `collaboration_task_count` (integer): Number of collaboration tasks.
|
| 78 |
+
- `customer_level_value` (text): Customer level (Blue, Platinum).
|
| 79 |
+
- `customer_segment` (text): Customer segment (CS, HP, G2, G4).
|
| 80 |
+
- `cst_area_code` (text): Customer area code.
|
| 81 |
+
- `cst_sub_are_code` (text): Customer sub-area code.
|
| 82 |
+
- `circuit_name` (text): Circuit name.
|
| 83 |
+
- `collaboration_task` (text): Collaboration task.
|
| 84 |
+
- `repeated_ticket_based_on_area_sub_area` (integer): Number of repeated tickets based on area/sub-area.
|
| 85 |
+
- `service_based_repeated_ticket` (integer): Number of service-based repeated tickets.
|
| 86 |
+
- `contact_number` (text): Contact number.
|
| 87 |
+
- `product` (text): Product name (Fiber Enterprise, IP VPN, Hosted PBX, etc.).
|
| 88 |
+
|
| 89 |
+
### Relationships with Other Tables (Foreign Keys):
|
| 90 |
+
- `customer_id` (FK) references `customer` table
|
| 91 |
+
- `ticket_id` is referenced in the `ticket_comment` table
|
| 92 |
+
|
| 93 |
+
### Cardinality of Relationships:
|
| 94 |
+
- One customer can have multiple tickets (many-to-one with customer)
|
| 95 |
+
- One ticket can have multiple comments (one-to-many with ticket_comment)
|
| 96 |
+
|
| 97 |
+
### Common Use Cases/Example Queries:
|
| 98 |
+
- List of open tickets
|
| 99 |
+
- Ticket distribution by priority
|
| 100 |
+
- Product-based ticket analysis
|
| 101 |
+
- Example SQL Snippet: `SELECT * FROM ticket WHERE status = 'Open' AND priority = 'High';`
|
| 102 |
+
|
| 103 |
+
### Data Constraints and Business Rules:
|
| 104 |
+
- `ticket_id` must be unique and is the primary key
|
| 105 |
+
- `status` and `sub_status` are restricted to specific values
|
| 106 |
+
- `priority` can only be Low, Medium, or High
|
| 107 |
+
|
| 108 |
+
### Data Update Frequency/Volatility:
|
| 109 |
+
- Tickets are continuously created and updated
|
| 110 |
+
- Status and sub_status are frequently updated
|
| 111 |
+
|
| 112 |
+
### Important Notes/Considerations for Querying:
|
| 113 |
+
- Status and sub_status should be evaluated together
|
| 114 |
+
- Consider timezone when querying date-based data
|
| 115 |
+
|
| 116 |
+
---
|
| 117 |
+
|
| 118 |
+
## Table: `ticket_comment`
|
| 119 |
+
|
| 120 |
+
### Schema: `public`
|
| 121 |
+
|
| 122 |
+
### All Columns and their Descriptions:
|
| 123 |
+
- `comment_id` (integer): Unique identifier for the comment. Primary key.
|
| 124 |
+
- `ticket_id` (text): Related ticket ID. Foreign key to ticket table.
|
| 125 |
+
- `created_at` (timestamp): Date and time when the comment was created.
|
| 126 |
+
- `user_type` (text): Type of user making the comment (customer, agent).
|
| 127 |
+
- `channel` (text): Comment channel (email, chat, portal).
|
| 128 |
+
- `comment_text` (text): Comment text.
|
| 129 |
+
|
| 130 |
+
### Relationships with Other Tables (Foreign Keys):
|
| 131 |
+
- `ticket_id` (FK) references `ticket` table
|
| 132 |
+
|
| 133 |
+
### Cardinality of Relationships:
|
| 134 |
+
- One ticket can have multiple comments (many-to-one with ticket)
|
| 135 |
+
|
| 136 |
+
### Common Use Cases/Example Queries:
|
| 137 |
+
- View all comments for a specific ticket
|
| 138 |
+
- Analysis of comments by channel
|
| 139 |
+
- Distribution of comments by user type
|
| 140 |
+
- Example SQL Snippet: `SELECT * FROM ticket_comment WHERE ticket_id = 'W-137014' ORDER BY created_at DESC;`
|
| 141 |
+
|
| 142 |
+
### Data Constraints and Business Rules:
|
| 143 |
+
- `comment_id` must be unique and is the primary key
|
| 144 |
+
- `user_type` can only be 'customer' or 'agent'
|
| 145 |
+
- `channel` can only be 'email', 'chat', or 'portal'
|
| 146 |
+
|
| 147 |
+
### Data Update Frequency/Volatility:
|
| 148 |
+
- Comments are continuously added
|
| 149 |
+
- Existing comments are rarely updated or deleted
|
| 150 |
+
|
| 151 |
+
### Important Notes/Considerations for Querying:
|
| 152 |
+
- Important to order comments by date
|
| 153 |
+
- Check consistency in ticket-comment relationships
|