Spaces:
Sleeping
Sleeping
File size: 13,941 Bytes
a1559c3 912de1a a1559c3 316cdf2 8c4d609 08fa7d0 a86480f 76e6bd8 070c079 a1559c3 070c079 08fa7d0 070c079 a1559c3 070c079 a1559c3 070c079 a1559c3 070c079 a1559c3 070c079 a1559c3 070c079 a1559c3 070c079 a1559c3 070c079 08fa7d0 070c079 8135229 08fa7d0 a1559c3 070c079 a1559c3 cccdaee 46b8b79 cccdaee a1559c3 cccdaee 46b8b79 58a6bd3 615d9d6 88a5675 46b8b79 b49d0be 58a6bd3 584115f 58a6bd3 46b8b79 cccdaee 58a6bd3 cccdaee 46b8b79 cccdaee 1dd98da cccdaee 58a6bd3 cccdaee 58a6bd3 cccdaee a1559c3 cccdaee a1559c3 cccdaee a1559c3 5fafb73 070589c d1b19e6 81a7385 358eff7 cccdaee 070589c a1559c3 8329297 5fafb73 070589c 439fb3c a1559c3 46b8b79 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | import gradio as gr
from sqlalchemy import create_engine
import pandas as pd
import openai
import os
from lida import Manager, TextGenerationConfig, llm
from llmx.generators.text.openai_textgen import OpenAITextGenerator
from langchain_openai import AzureChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import pandas as pd
import base64
import numpy as np
import matplotlib.image as mpimg
from PIL import Image
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
import base64
from utils.azure_blob import AzureBlob
from langchain.output_parsers import CommaSeparatedListOutputParser
from pprint import pprint
azure_blob = AzureBlob(os.getenv("azure_blob_conn"))
ab = azure_blob
os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["AZURE_OPENAI_API_VERSION"] = "2023-06-01-preview"
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OPENAI_ENDPOINT")
db_host = os.getenv('DB_HOST')
db_name = os.getenv('DB_NAME')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
model = AzureChatOpenAI(
deployment_name="CapSuiteGPT4omini",
openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)
def choose_table(question):
try:
str_client_name = 'foodBeverageSample1'
df_data = pd.read_parquet(ab.get_latest_parquet('landing', str_client_name, 'sale', 'sol_'))
df_data2 = pd.read_parquet(ab.get_latest_parquet('landing', str_client_name, 'membership', 'mem_'))
# connection_string = f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}/{db_name}'
# engine = create_engine(connection_string)
# capsuite_ref = 'foodBeverageSample1'
# model = AzureChatOpenAI(
# deployment_name="CapSuiteGPT4omini",
# openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
# )
# table_format = """
# 1.table name:cdp_sale_order,
# its columns:trxn_id,member_id,staff_id,subsidiary_name,staff_name,team_name,trxn_ref,trxn_channel,trxn_date,trxn_year,trxn_month,trxn_day,trxn_week,remark.
# 2.table name:cdp_sale_order_line,
# its columns:trxn_item_id,trxn_id,trxn_item_target_curr_unit_price,
# trxn_item_qty,trxn_item_discount_amt,trxn_original_net_currency,trxn_date,trxn_channel,staff_name,staff_id,member_id,display_name,pord_sku,prod_category,prod_type,prod_name,
# capsuite_ref.
# 3.table name:cdp_stock_quant,
# its columns:stock_quant_id,prod_id,location_id,stock_quantity,stock_quantity_reserved,stock_quant_create_date,capsuite_ref.
# """
# prompt = ChatPromptTemplate.from_template("Base on the question:{question},"
# "And the following table format:{table_format},"
# "Dont write a complex query. Only select statement like 'select * from table_name'."
# "Dont add any condition or filter to the query. The query should be generic and should return all the data from the table."
# "Select all the columns from the table. "
# "Only output one SQL Query without any other information even the '''sql''' prefix. ")
# chain = (
# {"question": RunnablePassthrough(), "table_format": RunnablePassthrough()}
# # {"table_format": RunnablePassthrough()}
# | prompt
# | model
# | StrOutputParser()
# )
# # query = 'select * from cdp_membership_summary;'
# query = chain.invoke({"question": question, "table_format": table_format})
# query = query.replace(f"`", '')
# query = query.replace(f"sql", '')
# query = query.split(';')[0] + f' where capsuite_ref = \'{capsuite_ref}\';'
# df_data = pd.read_sql(query, engine)
# print(f'*'*50)
# print(f"Query: {query}")
# if 'cdp_sale_order_line' in query:
df_data = pd.merge(df_data, df_data2, on='member_id', how='left',suffixes=('_sale_order_line', '_membership'))
df_data['sales_amount'] = df_data['trxn_item_target_curr_unit_price'].astype(float) * df_data['trxn_item_qty'].astype(float)
df_data.rename(columns={'trxn_item_target_curr_unit_price':'unit_price'}, inplace=True)
df_data.rename(columns={'display_name_membership':'customer_name'}, inplace=True)
df_data.rename(columns={'capsuite_ref_sale_order_line':'capsuite_ref'}, inplace=True)
df_data.rename(columns={'trxn_item_qty':'sales_qty'}, inplace=True)
df_data['trxn_date'] = pd.to_datetime(df_data['trxn_date']).dt.date
df_data['trxn_month'] = pd.to_datetime(df_data['trxn_date']).dt.to_period('M')
df_data['trxn_date'] = df_data['trxn_date'].astype(str)
df_data['trxn_month'] = df_data['trxn_month'].astype(str)
df_data = df_data[['trxn_item_id','trxn_id','sales_amount','unit_price','sales_qty','trxn_item_discount_amt','trxn_date','trxn_channel','staff_name','customer_name','prod_category','prod_type','prod_name','capsuite_ref','gender','age','trxn_month']]
except Exception as e:
print(f"Error while: {e}")
finally:
# engine.dispose()
return df_data
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def random_response(message):
max_attempts = 1 # Set the maximum number of attempts
attempts = 0
while attempts < max_attempts:
try:
df_data = choose_table(message)
question = message
# fill na with empty string
df_data.fillna('', inplace=True)
# loop columns, if column is object type, convert to string
for col in df_data.columns:
if df_data[col].dtype == 'object':
df_data[col] = df_data[col].astype(str)
text_gen = OpenAITextGenerator(
provider='openai',
api_type='azure',
azure_endpoint= os.getenv('AZURE_OPENAI_ENDPOINT'),
api_key= os.getenv('OPENAI_API_KEY'),
api_version = '2023-05-15',
)
lida = Manager(text_gen=text_gen)
text_gen_config = TextGenerationConfig(
n = 1,
model = 'CapSuiteGPT35T16K',
temperature=0
)
summary = lida.summarize(df_data)
print(f'*'*50)
pprint(f"{summary}")
str_summary = str(summary)
print(f'*'*50)
time_now = pd.Timestamp.now()
print(f'Datetime now:{time_now}')
goals = lida.goals(summary, n=1, textgen_config=text_gen_config,persona=f'An data analyst of the company who want to know {question}')
print(f'goals: {goals[0]}')
output_parser = CommaSeparatedListOutputParser()
# "Bussiness insights focus on different aspects of the data, such as sales amount,sales qty, product category, time, etc."
model = AzureChatOpenAI(
deployment_name="CapSuiteGPT4omini",
openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
temperature=0
)
str_summary = str(summary)
prompt = ChatPromptTemplate.from_template("Based on the data below:{str_summary},"
# "please give me the most related and useful possible question to get simple but useful insights for {question}."
"The data is sales order line is every transaction of the company."
"Base on the question:{question}, regenerate the output"
# "Your output will be used to guide the graph generation by python using ploty, so make it simple and easier to process data."
"If the original question is not metion time related varibles,do not add it."
"For example: 'Goal(question='What are the sales trends by product category?visualization='bar chart of prod_category against sum(trxn_item_qty) grouped by trxn_date'. and"
"'Goal(question='Who are the top customers based on transaction count?', visualization='Bar chart of customer_name vs. count(trxn_id)')"
"If top in your output Goal question, default it to 10."
"The visualization should align with the question and the data."
"Usually, when deal with:age, show all the data."
"But for other datas beside age: customer,prouct,sales,qty,etc show top 10."
"Process the top data at last when put in the graph."
"When ask customers,customer, it means customer_name."
"When ask product, it means prod_name."
"When ask category, it means prod_category."
"etc, find the right column name exsiting in the data."
"If the data columns is empty, please ignore the column."
"Only output 1 question."
"")
chain = (
{"str_summary": RunnablePassthrough(),"question": RunnablePassthrough()}
| prompt
| model
| output_parser
)
insights = chain.invoke({"str_summary": str_summary, "question": question})
print(f'*'*50)
print(f'insights: {insights}')
# ValueError: Unsupported library. Choose from 'matplotlib', 'seaborn', 'plotly', 'bokeh', 'ggplot', 'altair'.
try:
temp_chart = lida.visualize(summary=summary, goal=str(insights)+"Graph heigh 800,width 1000.Set different colors to different varibles.x label rotate 60 degree,do not use the guide line", textgen_config=text_gen_config,library='matplotlib')
print(f'*'*50)
code = temp_chart[0].code
print(f"{code}")
# instructions = ["change the color of the graph to #4169E1 if there is only one variable","change the background color to white but keep the grid lines grey","set the average line for the graph to be red"]
# edited_chart = lida.edit(code=code,summary=summary,instructions=instructions,library='plotly',textgen_config = text_gen_config)
except Exception as e:
print(f"Error while: {e}")
temp_chart[0].savefig(f'chart_1.png')
print(f'*'*50)
print(f"Chart saved")
# Path to your image
image_path = "chart_1.png"
# Open the image file
# img = Image.open(image_path)
img = mpimg.imread('chart_1.png')
print(f'*'*50)
print(f"Image opened")
base64_image = encode_image(image_path)
llm = model
response = llm.invoke(
[
HumanMessage(
content=[
{"type": "text", "text": f"Give me some business insights base on the graph, contain exact number conclusion."},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
]
)
]
)
final_result_str = response.content
return final_result_str,img
except Exception as e:
attempts += 1
print(f"Attempt {attempts} failed with error: {e}")
if attempts >= max_attempts:
return "An error occurred after multiple attempts.", None # Return an error message
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
temp_img = gr.Image(
height=800
)
with gr.Column():
chat_input = gr.Textbox(placeholder="Type your message here...", label="Chat")
examples = gr.Examples(
examples=['Top 10 prod_cate sales', 'Top product in category Seafood','Total sales amount by product category each day using line chart','What are the top selling at product level??',
'Sales amount distribution by age','Sales amount distribution by gender',
'Top customer by sales amount'
],
inputs=chat_input
)
chat_output = gr.Textbox(label="Response", interactive=False)
submit_button = gr.Button("生成响应")
submit_button.click(fn=random_response, inputs=chat_input, outputs=[chat_output, temp_img])
demo.launch() |