import gradio as gr from sqlalchemy import create_engine import pandas as pd import openai import os from lida import Manager, TextGenerationConfig, llm from llmx.generators.text.openai_textgen import OpenAITextGenerator from langchain_openai import AzureChatOpenAI from langchain_core.runnables import RunnablePassthrough from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser import pandas as pd import base64 import numpy as np import matplotlib.image as mpimg from PIL import Image from langchain_core.messages import HumanMessage from langchain_openai import ChatOpenAI import base64 from utils.azure_blob import AzureBlob from langchain.output_parsers import CommaSeparatedListOutputParser from pprint import pprint azure_blob = AzureBlob(os.getenv("azure_blob_conn")) ab = azure_blob os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") os.environ["AZURE_OPENAI_API_VERSION"] = "2023-06-01-preview" os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OPENAI_ENDPOINT") db_host = os.getenv('DB_HOST') db_name = os.getenv('DB_NAME') db_user = os.getenv('DB_USER') db_password = os.getenv('DB_PASSWORD') model = AzureChatOpenAI( deployment_name="CapSuiteGPT4omini", openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"), ) def choose_table(question): try: str_client_name = 'foodBeverageSample1' df_data = pd.read_parquet(ab.get_latest_parquet('landing', str_client_name, 'sale', 'sol_')) df_data2 = pd.read_parquet(ab.get_latest_parquet('landing', str_client_name, 'membership', 'mem_')) # connection_string = f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}/{db_name}' # engine = create_engine(connection_string) # capsuite_ref = 'foodBeverageSample1' # model = AzureChatOpenAI( # deployment_name="CapSuiteGPT4omini", # openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"), # ) # table_format = """ # 1.table name:cdp_sale_order, # its columns:trxn_id,member_id,staff_id,subsidiary_name,staff_name,team_name,trxn_ref,trxn_channel,trxn_date,trxn_year,trxn_month,trxn_day,trxn_week,remark. # 2.table name:cdp_sale_order_line, # its columns:trxn_item_id,trxn_id,trxn_item_target_curr_unit_price, # trxn_item_qty,trxn_item_discount_amt,trxn_original_net_currency,trxn_date,trxn_channel,staff_name,staff_id,member_id,display_name,pord_sku,prod_category,prod_type,prod_name, # capsuite_ref. # 3.table name:cdp_stock_quant, # its columns:stock_quant_id,prod_id,location_id,stock_quantity,stock_quantity_reserved,stock_quant_create_date,capsuite_ref. # """ # prompt = ChatPromptTemplate.from_template("Base on the question:{question}," # "And the following table format:{table_format}," # "Dont write a complex query. Only select statement like 'select * from table_name'." # "Dont add any condition or filter to the query. The query should be generic and should return all the data from the table." # "Select all the columns from the table. " # "Only output one SQL Query without any other information even the '''sql''' prefix. ") # chain = ( # {"question": RunnablePassthrough(), "table_format": RunnablePassthrough()} # # {"table_format": RunnablePassthrough()} # | prompt # | model # | StrOutputParser() # ) # # query = 'select * from cdp_membership_summary;' # query = chain.invoke({"question": question, "table_format": table_format}) # query = query.replace(f"`", '') # query = query.replace(f"sql", '') # query = query.split(';')[0] + f' where capsuite_ref = \'{capsuite_ref}\';' # df_data = pd.read_sql(query, engine) # print(f'*'*50) # print(f"Query: {query}") # if 'cdp_sale_order_line' in query: df_data = pd.merge(df_data, df_data2, on='member_id', how='left',suffixes=('_sale_order_line', '_membership')) df_data['sales_amount'] = df_data['trxn_item_target_curr_unit_price'].astype(float) * df_data['trxn_item_qty'].astype(float) df_data.rename(columns={'trxn_item_target_curr_unit_price':'unit_price'}, inplace=True) df_data.rename(columns={'display_name_membership':'customer_name'}, inplace=True) df_data.rename(columns={'capsuite_ref_sale_order_line':'capsuite_ref'}, inplace=True) df_data.rename(columns={'trxn_item_qty':'sales_qty'}, inplace=True) df_data['trxn_date'] = pd.to_datetime(df_data['trxn_date']).dt.date df_data['trxn_month'] = pd.to_datetime(df_data['trxn_date']).dt.to_period('M') df_data['trxn_date'] = df_data['trxn_date'].astype(str) df_data['trxn_month'] = df_data['trxn_month'].astype(str) df_data = df_data[['trxn_item_id','trxn_id','sales_amount','unit_price','sales_qty','trxn_item_discount_amt','trxn_date','trxn_channel','staff_name','customer_name','prod_category','prod_type','prod_name','capsuite_ref','gender','age','trxn_month']] except Exception as e: print(f"Error while: {e}") finally: # engine.dispose() return df_data # Function to encode the image def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def random_response(message): max_attempts = 1 # Set the maximum number of attempts attempts = 0 while attempts < max_attempts: try: df_data = choose_table(message) question = message # fill na with empty string df_data.fillna('', inplace=True) # loop columns, if column is object type, convert to string for col in df_data.columns: if df_data[col].dtype == 'object': df_data[col] = df_data[col].astype(str) text_gen = OpenAITextGenerator( provider='openai', api_type='azure', azure_endpoint= os.getenv('AZURE_OPENAI_ENDPOINT'), api_key= os.getenv('OPENAI_API_KEY'), api_version = '2023-05-15', ) lida = Manager(text_gen=text_gen) text_gen_config = TextGenerationConfig( n = 1, model = 'CapSuiteGPT35T16K', temperature=0 ) summary = lida.summarize(df_data) print(f'*'*50) pprint(f"{summary}") str_summary = str(summary) print(f'*'*50) time_now = pd.Timestamp.now() print(f'Datetime now:{time_now}') goals = lida.goals(summary, n=1, textgen_config=text_gen_config,persona=f'An data analyst of the company who want to know {question}') print(f'goals: {goals[0]}') output_parser = CommaSeparatedListOutputParser() # "Bussiness insights focus on different aspects of the data, such as sales amount,sales qty, product category, time, etc." model = AzureChatOpenAI( deployment_name="CapSuiteGPT4omini", openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"), temperature=0 ) str_summary = str(summary) prompt = ChatPromptTemplate.from_template("Based on the data below:{str_summary}," # "please give me the most related and useful possible question to get simple but useful insights for {question}." "The data is sales order line is every transaction of the company." "Base on the question:{question}, regenerate the output" # "Your output will be used to guide the graph generation by python using ploty, so make it simple and easier to process data." "If the original question is not metion time related varibles,do not add it." "For example: 'Goal(question='What are the sales trends by product category?visualization='bar chart of prod_category against sum(trxn_item_qty) grouped by trxn_date'. and" "'Goal(question='Who are the top customers based on transaction count?', visualization='Bar chart of customer_name vs. count(trxn_id)')" "If top in your output Goal question, default it to 10." "The visualization should align with the question and the data." "Usually, when deal with:age, show all the data." "But for other datas beside age: customer,prouct,sales,qty,etc show top 10." "Process the top data at last when put in the graph." "When ask customers,customer, it means customer_name." "When ask product, it means prod_name." "When ask category, it means prod_category." "etc, find the right column name exsiting in the data." "If the data columns is empty, please ignore the column." "Only output 1 question." "") chain = ( {"str_summary": RunnablePassthrough(),"question": RunnablePassthrough()} | prompt | model | output_parser ) insights = chain.invoke({"str_summary": str_summary, "question": question}) print(f'*'*50) print(f'insights: {insights}') # ValueError: Unsupported library. Choose from 'matplotlib', 'seaborn', 'plotly', 'bokeh', 'ggplot', 'altair'. try: temp_chart = lida.visualize(summary=summary, goal=str(insights)+"Graph heigh 800,width 1000.Set different colors to different varibles.x label rotate 60 degree,do not use the guide line", textgen_config=text_gen_config,library='matplotlib') print(f'*'*50) code = temp_chart[0].code print(f"{code}") # instructions = ["change the color of the graph to #4169E1 if there is only one variable","change the background color to white but keep the grid lines grey","set the average line for the graph to be red"] # edited_chart = lida.edit(code=code,summary=summary,instructions=instructions,library='plotly',textgen_config = text_gen_config) except Exception as e: print(f"Error while: {e}") temp_chart[0].savefig(f'chart_1.png') print(f'*'*50) print(f"Chart saved") # Path to your image image_path = "chart_1.png" # Open the image file # img = Image.open(image_path) img = mpimg.imread('chart_1.png') print(f'*'*50) print(f"Image opened") base64_image = encode_image(image_path) llm = model response = llm.invoke( [ HumanMessage( content=[ {"type": "text", "text": f"Give me some business insights base on the graph, contain exact number conclusion."}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" }, }, ] ) ] ) final_result_str = response.content return final_result_str,img except Exception as e: attempts += 1 print(f"Attempt {attempts} failed with error: {e}") if attempts >= max_attempts: return "An error occurred after multiple attempts.", None # Return an error message with gr.Blocks() as demo: with gr.Row(): with gr.Column(): temp_img = gr.Image( height=800 ) with gr.Column(): chat_input = gr.Textbox(placeholder="Type your message here...", label="Chat") examples = gr.Examples( examples=['Top 10 prod_cate sales', 'Top product in category Seafood','Total sales amount by product category each day using line chart','What are the top selling at product level??', 'Sales amount distribution by age','Sales amount distribution by gender', 'Top customer by sales amount' ], inputs=chat_input ) chat_output = gr.Textbox(label="Response", interactive=False) submit_button = gr.Button("η”Ÿζˆε“εΊ”") submit_button.click(fn=random_response, inputs=chat_input, outputs=[chat_output, temp_img]) demo.launch()