amirkiarafiei commited on
Commit
0391cfb
·
1 Parent(s): 938a3f9

feat: demo with yugabyte

Browse files
.env.sample CHANGED
@@ -4,6 +4,9 @@ TABLE_SUMMARY_PATH=
4
  DB_URL=
5
  DB_SCHEMA=
6
 
 
 
 
7
  GEMINI_API_KEY=
8
  GEMINI_MODEL=
9
  GEMINI_MODEL_PROVIDER=
 
4
  DB_URL=
5
  DB_SCHEMA=
6
 
7
+ # DB URL with root.crt for yugabyte
8
+ # DB_URL="postgresql://username:password@us-east-1.14747abb-66a1-48eb-a85f-d434c56d612b.aws.yugabyte.cloud:5433/yugabyte?sslmode=verify-full&sslrootcert=root.crt"
9
+
10
  GEMINI_API_KEY=
11
  GEMINI_MODEL=
12
  GEMINI_MODEL_PROVIDER=
gradio_app.py CHANGED
@@ -11,6 +11,7 @@ from memory_store import MemoryStore
11
  import logging
12
 
13
 
 
14
  # ======================================= Load DB configs
15
  def load_db_configs():
16
  """Load database configurations from configs.yaml"""
@@ -121,41 +122,56 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
121
  scale=4
122
  ),
123
  theme="soft",
 
 
 
 
 
 
 
 
 
 
 
 
124
  examples=[
125
  "Describe the database",
126
  "List all tables in the database",
127
  "List all tables with columns and data types",
128
- "How many customers do you have?",
129
- "What are the statuses my of my customers",
130
  "Visualize with different colors and show legend",
131
- "What are the statues of my customers and how many are in each status, show it by percentage",
132
- "Total number of completed orders in six years by customer count show top most 10 customers",
133
- "In january how many products has been sold ? group them by year",
134
- "How many users and roles have been created in 2024"
135
  ],
136
  save_history=True,
137
  type="messages"
138
  )
139
  with gr.Column(scale=1):
140
  with gr.Accordion("Example Questions", open=True):
 
 
 
 
 
 
 
141
  gr.Markdown("""
142
  - 📊 List all tables in database
143
  - 👥 Total number of customers
144
  - 📈 Visualize it with different colors
145
  - 📋 Order statistics for last 6 years
146
- - 📆 User and role counts in 2024
147
  """)
148
 
149
 
150
-
151
-
152
  # TODO: maybe we can add a mcp tool to validate the results (those converted to DataFrame) to make sure the valid type is passed to the visualization tool by ReAct agent
153
 
154
 
155
  if __name__ == "__main__":
156
  demo.launch(
157
  server_name="0.0.0.0",
158
- server_port=7860,
159
  share=True,
160
  debug=True
161
  )
 
11
  import logging
12
 
13
 
14
+
15
  # ======================================= Load DB configs
16
  def load_db_configs():
17
  """Load database configurations from configs.yaml"""
 
122
  scale=4
123
  ),
124
  theme="soft",
125
+ # examples=[
126
+ # "Describe the database",
127
+ # "List all tables in the database",
128
+ # "List all tables with columns and data types",
129
+ # "How many customers do you have?",
130
+ # "What are the statuses my of my customers",
131
+ # "Visualize with different colors and show legend",
132
+ # "What are the statues of my customers and how many are in each status, show it by percentage",
133
+ # "Total number of completed orders in six years by customer count show top most 10 customers",
134
+ # "In january how many products has been sold ? group them by year",
135
+ # "How many users and roles have been created in 2024"
136
+ # ],
137
  examples=[
138
  "Describe the database",
139
  "List all tables in the database",
140
  "List all tables with columns and data types",
141
+ "How many comments are there per ticket channel (email, chat, portal)? Also Visualize it as a pie chart",
 
142
  "Visualize with different colors and show legend",
143
+ "How many customers are in each industry?",
144
+ "List the 5 most active agents by ticket count in 2024.",
145
+ "How many tickets were reopened at least once?"
 
146
  ],
147
  save_history=True,
148
  type="messages"
149
  )
150
  with gr.Column(scale=1):
151
  with gr.Accordion("Example Questions", open=True):
152
+ # gr.Markdown("""
153
+ # - 📊 List all tables in database
154
+ # - 👥 Total number of customers
155
+ # - 📈 Visualize it with different colors
156
+ # - 📋 Order statistics for last 6 years
157
+ # - 📆 User and role counts in 2024
158
+ # """)
159
  gr.Markdown("""
160
  - 📊 List all tables in database
161
  - 👥 Total number of customers
162
  - 📈 Visualize it with different colors
163
  - 📋 Order statistics for last 6 years
164
+ - 📆 Average ticket reopen count per year
165
  """)
166
 
167
 
 
 
168
  # TODO: maybe we can add a mcp tool to validate the results (those converted to DataFrame) to make sure the valid type is passed to the visualization tool by ReAct agent
169
 
170
 
171
  if __name__ == "__main__":
172
  demo.launch(
173
  server_name="0.0.0.0",
174
+ # server_port=7860,
175
  share=True,
176
  debug=True
177
  )
langchain_mcp_client.py CHANGED
@@ -15,7 +15,6 @@ from langchain_community.chat_message_histories import ChatMessageHistory
15
  from memory_store import MemoryStore
16
 
17
 
18
-
19
  # set_debug(True)
20
 
21
 
@@ -103,6 +102,7 @@ def load_table_summary(path: str) -> str:
103
  with open(path, 'r') as file:
104
  return file.read()
105
 
 
106
  def get_server_params() -> StdioServerParameters:
107
  return StdioServerParameters(
108
  command="python",
 
15
  from memory_store import MemoryStore
16
 
17
 
 
18
  # set_debug(True)
19
 
20
 
 
102
  with open(path, 'r') as file:
103
  return file.read()
104
 
105
+
106
  def get_server_params() -> StdioServerParameters:
107
  return StdioServerParameters(
108
  command="python",
postgre_mcp_server.py CHANGED
@@ -15,7 +15,10 @@ import logging
15
  DEFAULT_QUERY_LIMIT = 100
16
 
17
  # logging info
18
- logging.basicConfig(level=logging.INFO)
 
 
 
19
 
20
  # Define our own PromptMessage class if the MCP one isn't available
21
  @dataclass
@@ -148,10 +151,10 @@ You can use the following FastMCP tools to create **read-only** queries (e.g., `
148
  # Output Format
149
  ==========================
150
 
151
- Present your final answer using the following structure **exactly** in markdown language. When necessary, bold the important parts of your answer or use `` for inline code blocks:
152
 
153
  # Result
154
- {{Take the result from the execute_query tool and format it nicely using Markdown. Use a Markdown table for tabular data (rows and columns) including headers. Use bullet points or items in markdown for answers that include lists of names or descriptions. Use plain text for single values or simple messages. Ensure data alignment and clarity.}}
155
 
156
  # Explanation
157
  {{Provide a concise explanation or interpretation of the results (and visualization, if applicable) in 1-3 sentences. Explain what the data and visualization (if any) represent in the context of the user's request.}}
 
15
  DEFAULT_QUERY_LIMIT = 100
16
 
17
  # logging info
18
+ # logging.basicConfig(level=logging.INFO)
19
+
20
+ # get logger
21
+ logger = logging.getLogger(__name__)
22
 
23
  # Define our own PromptMessage class if the MCP one isn't available
24
  @dataclass
 
151
  # Output Format
152
  ==========================
153
 
154
+ Present your final answer using the following structure in markdown language:
155
 
156
  # Result
157
+ {{Take the result from the execute_query tool and format it nicely using Markdown. Use a beautiful Markdown table for tabular data (rows and columns) including headers and show such simple results using a table. Use bullet points or items in markdown for answers that include lists of names or descriptions. Use plain text for single values or simple messages. Ensure data alignment and clarity.}}
158
 
159
  # Explanation
160
  {{Provide a concise explanation or interpretation of the results (and visualization, if applicable) in 1-3 sentences. Explain what the data and visualization (if any) represent in the context of the user's request.}}
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
table_summary_yugabyte.txt ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Database: helpdesk
2
+ ### Description:
3
+ This database is designed for managing customer support tickets and related comments in a helpdesk system. The database consists of three main tables containing customer information, support tickets, and ticket comments.
4
+
5
+ ### All Tables:
6
+ - customer
7
+ - ticket
8
+ - ticket_comment
9
+
10
+ ### Default Schema:
11
+ - public
12
+
13
+ ---
14
+
15
+ ## Table: `customer`
16
+
17
+ ### Schema: `public`
18
+
19
+ ### All Columns and their Descriptions:
20
+ - `customer_id` (integer): Unique identifier for the customer. Primary key.
21
+ - `name` (text): Name of the customer company.
22
+ - `industry` (text): Industry sector of the customer (Finance, Telecommunications, IT Services, Retail).
23
+ - `contact_email` (text): Contact email address.
24
+ - `contact_phone` (text): Contact phone number.
25
+ - `region` (text): Customer's region (Dammam, Riyadh, Jeddah, Mecca).
26
+ - `created_at` (timestamp): Date when the customer record was created.
27
+ - `status` (text): Current status of the customer (Active, Inactive, Prospect, Suspended, Blacklisted, Pending Verification, Closed).
28
+
29
+ ### Relationships with Other Tables (Foreign Keys):
30
+ - `customer_id` is referenced in the `ticket` table.
31
+
32
+ ### Cardinality of Relationships:
33
+ - One customer can have multiple support tickets (one-to-many).
34
+
35
+ ### Common Use Cases/Example Queries:
36
+ - List active customers in a specific region
37
+ - Analyze customer distribution by industry
38
+ - Generate statistics based on customer status
39
+ - Example SQL Snippet: `SELECT * FROM customer WHERE status = 'Active' AND region = 'Riyadh';`
40
+
41
+ ### Data Constraints and Business Rules:
42
+ - `customer_id` must be unique and is the primary key
43
+ - `status` values are restricted to a controlled vocabulary
44
+ - `created_at` must be a valid date
45
+
46
+ ### Data Update Frequency/Volatility:
47
+ - Customer information typically changes with new customer registrations or updates to existing customer information
48
+ - Status updates occur regularly
49
+
50
+ ### Important Notes/Considerations for Querying:
51
+ - Be careful when filtering by status
52
+ - Pay attention to correct region names in region-based analyses
53
+
54
+ ---
55
+
56
+ ## Table: `ticket`
57
+
58
+ ### Schema: `public`
59
+
60
+ ### All Columns and their Descriptions:
61
+ - `ticket_id` (text): Unique identifier for the support ticket. Primary key.
62
+ - `status` (text): Ticket status (Open, Resolved-Cancelled, Resolved-Completed).
63
+ - `sub_status` (text): Ticket sub-status (Pending for approval, Cancelled, Completed).
64
+ - `created_at` (timestamp): Date and time when the ticket was created.
65
+ - `service_number` (text): Service number.
66
+ - `customer_id` (integer): Customer ID. Foreign key to customer table.
67
+ - `sr_classification` (text): Service request classification (Financial).
68
+ - `type` (text): Ticket type (Complaint, Request, Inquiry).
69
+ - `area` (text): Main area (Billing).
70
+ - `sub_area` (text): Sub-area (Auto Adjustments, Mass Adjustment, Bulk Balance Transfer, Refunds).
71
+ - `owner` (text): Ticket owner (Agent A, Agent B, Agent C, etc.).
72
+ - `ticket_group` (text): Ticket group (Technical Support, Billing Team, Customer Service, NOC, Field Operations).
73
+ - `description` (text): Ticket description.
74
+ - `priority` (text): Priority level (Low, Medium, High).
75
+ - `re_open_count` (integer): Number of times the ticket was reopened.
76
+ - `follow_up_count` (integer): Number of follow-ups.
77
+ - `collaboration_task_count` (integer): Number of collaboration tasks.
78
+ - `customer_level_value` (text): Customer level (Blue, Platinum).
79
+ - `customer_segment` (text): Customer segment (CS, HP, G2, G4).
80
+ - `cst_area_code` (text): Customer area code.
81
+ - `cst_sub_are_code` (text): Customer sub-area code.
82
+ - `circuit_name` (text): Circuit name.
83
+ - `collaboration_task` (text): Collaboration task.
84
+ - `repeated_ticket_based_on_area_sub_area` (integer): Number of repeated tickets based on area/sub-area.
85
+ - `service_based_repeated_ticket` (integer): Number of service-based repeated tickets.
86
+ - `contact_number` (text): Contact number.
87
+ - `product` (text): Product name (Fiber Enterprise, IP VPN, Hosted PBX, etc.).
88
+
89
+ ### Relationships with Other Tables (Foreign Keys):
90
+ - `customer_id` (FK) references `customer` table
91
+ - `ticket_id` is referenced in the `ticket_comment` table
92
+
93
+ ### Cardinality of Relationships:
94
+ - One customer can have multiple tickets (many-to-one with customer)
95
+ - One ticket can have multiple comments (one-to-many with ticket_comment)
96
+
97
+ ### Common Use Cases/Example Queries:
98
+ - List of open tickets
99
+ - Ticket distribution by priority
100
+ - Product-based ticket analysis
101
+ - Example SQL Snippet: `SELECT * FROM ticket WHERE status = 'Open' AND priority = 'High';`
102
+
103
+ ### Data Constraints and Business Rules:
104
+ - `ticket_id` must be unique and is the primary key
105
+ - `status` and `sub_status` are restricted to specific values
106
+ - `priority` can only be Low, Medium, or High
107
+
108
+ ### Data Update Frequency/Volatility:
109
+ - Tickets are continuously created and updated
110
+ - Status and sub_status are frequently updated
111
+
112
+ ### Important Notes/Considerations for Querying:
113
+ - Status and sub_status should be evaluated together
114
+ - Consider timezone when querying date-based data
115
+
116
+ ---
117
+
118
+ ## Table: `ticket_comment`
119
+
120
+ ### Schema: `public`
121
+
122
+ ### All Columns and their Descriptions:
123
+ - `comment_id` (integer): Unique identifier for the comment. Primary key.
124
+ - `ticket_id` (text): Related ticket ID. Foreign key to ticket table.
125
+ - `created_at` (timestamp): Date and time when the comment was created.
126
+ - `user_type` (text): Type of user making the comment (customer, agent).
127
+ - `channel` (text): Comment channel (email, chat, portal).
128
+ - `comment_text` (text): Comment text.
129
+
130
+ ### Relationships with Other Tables (Foreign Keys):
131
+ - `ticket_id` (FK) references `ticket` table
132
+
133
+ ### Cardinality of Relationships:
134
+ - One ticket can have multiple comments (many-to-one with ticket)
135
+
136
+ ### Common Use Cases/Example Queries:
137
+ - View all comments for a specific ticket
138
+ - Analysis of comments by channel
139
+ - Distribution of comments by user type
140
+ - Example SQL Snippet: `SELECT * FROM ticket_comment WHERE ticket_id = 'W-137014' ORDER BY created_at DESC;`
141
+
142
+ ### Data Constraints and Business Rules:
143
+ - `comment_id` must be unique and is the primary key
144
+ - `user_type` can only be 'customer' or 'agent'
145
+ - `channel` can only be 'email', 'chat', or 'portal'
146
+
147
+ ### Data Update Frequency/Volatility:
148
+ - Comments are continuously added
149
+ - Existing comments are rarely updated or deleted
150
+
151
+ ### Important Notes/Considerations for Querying:
152
+ - Important to order comments by date
153
+ - Check consistency in ticket-comment relationships