Spaces:

petrified
/

simple_visual

Sleeping

File size: 13,941 Bytes

a1559c3
 
 
 
 
 
 
 
 
 
 
 
 
 
912de1a
a1559c3
 
 
 
316cdf2
8c4d609
 
08fa7d0
a86480f
76e6bd8
070c079
a1559c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
070c079
 
08fa7d0
070c079
 
 
a1559c3
 
070c079
 
 
 
a1559c3
070c079
 
 
a1559c3
070c079
 
 
 
a1559c3
070c079
 
 
a1559c3
070c079
 
 
 
 
 
a1559c3
070c079
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1559c3
070c079
08fa7d0
070c079
 
8135229
08fa7d0
 
 
 
 
 
 
a1559c3
 
 
 
 
070c079
a1559c3
 
 
 
 
 
 
 
 
cccdaee
46b8b79
cccdaee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1559c3
cccdaee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46b8b79
 
 
58a6bd3
615d9d6
88a5675
46b8b79
b49d0be
58a6bd3
584115f
 
58a6bd3
46b8b79
 
 
 
cccdaee
 
 
 
58a6bd3
cccdaee
 
 
 
 
 
 
 
 
46b8b79
cccdaee
 
 
 
 
 
1dd98da
cccdaee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58a6bd3
 
cccdaee
 
 
 
 
 
 
 
 
58a6bd3
cccdaee
 
 
 
 
 
 
 
a1559c3
 
cccdaee
 
 
 
 
 
 
 
 
 
a1559c3
 
 
 
 
 
cccdaee
a1559c3
 
5fafb73
070589c
d1b19e6
81a7385
358eff7
cccdaee
070589c
a1559c3
8329297
5fafb73
 
070589c
439fb3c
a1559c3
46b8b79

import gradio as gr
from sqlalchemy import create_engine
import pandas as pd
import openai
import os
from lida import Manager, TextGenerationConfig, llm
from llmx.generators.text.openai_textgen import OpenAITextGenerator
from langchain_openai import AzureChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import pandas as pd
import base64
import numpy as np
import matplotlib.image as mpimg
from PIL import Image
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
import base64
from utils.azure_blob import AzureBlob
from langchain.output_parsers import CommaSeparatedListOutputParser

from pprint import pprint

azure_blob = AzureBlob(os.getenv("azure_blob_conn"))
ab = azure_blob

os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["AZURE_OPENAI_API_VERSION"] = "2023-06-01-preview"
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OPENAI_ENDPOINT")
db_host = os.getenv('DB_HOST')
db_name = os.getenv('DB_NAME')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')

model = AzureChatOpenAI(
    deployment_name="CapSuiteGPT4omini",
    openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)

def choose_table(question):
    try:
        str_client_name = 'foodBeverageSample1'
        df_data = pd.read_parquet(ab.get_latest_parquet('landing', str_client_name, 'sale', 'sol_'))
        df_data2 = pd.read_parquet(ab.get_latest_parquet('landing', str_client_name, 'membership', 'mem_'))
        # connection_string = f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}/{db_name}'
        # engine = create_engine(connection_string)
        # capsuite_ref = 'foodBeverageSample1'


        # model = AzureChatOpenAI(
        #     deployment_name="CapSuiteGPT4omini",
        #     openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
        # )
        
        # table_format = """
        #         1.table name:cdp_sale_order,
        #         its columns:trxn_id,member_id,staff_id,subsidiary_name,staff_name,team_name,trxn_ref,trxn_channel,trxn_date,trxn_year,trxn_month,trxn_day,trxn_week,remark.
                
        #         2.table name:cdp_sale_order_line,
        #         its columns:trxn_item_id,trxn_id,trxn_item_target_curr_unit_price,
        #         trxn_item_qty,trxn_item_discount_amt,trxn_original_net_currency,trxn_date,trxn_channel,staff_name,staff_id,member_id,display_name,pord_sku,prod_category,prod_type,prod_name,
        #         capsuite_ref.
                
        #         3.table name:cdp_stock_quant,
        #         its columns:stock_quant_id,prod_id,location_id,stock_quantity,stock_quantity_reserved,stock_quant_create_date,capsuite_ref.
        #         """

        # prompt = ChatPromptTemplate.from_template("Base on the question:{question},"
        #                                         "And the following table format:{table_format},"
        #                                         "Dont write a complex query. Only select statement like 'select * from table_name'."
        #                                         "Dont add any condition or filter to the query. The query should be generic and should return all the data from the table."
        #                                         "Select all the columns from the table. "
        #                                         "Only output one SQL Query without any other information even the '''sql''' prefix. ")

        # chain = (
        #         {"question": RunnablePassthrough(), "table_format": RunnablePassthrough()}
        #         # {"table_format": RunnablePassthrough()}
        #         | prompt
        #         | model
        #         | StrOutputParser()
        # )
        # # query = 'select * from cdp_membership_summary;'
        # query = chain.invoke({"question": question, "table_format": table_format})
        # query = query.replace(f"`", '')
        # query = query.replace(f"sql", '')
        # query = query.split(';')[0] + f' where capsuite_ref = \'{capsuite_ref}\';'
        # df_data = pd.read_sql(query, engine)
        # print(f'*'*50)
        # print(f"Query: {query}")

        # if 'cdp_sale_order_line' in query:
        df_data = pd.merge(df_data, df_data2, on='member_id', how='left',suffixes=('_sale_order_line', '_membership'))
        df_data['sales_amount'] = df_data['trxn_item_target_curr_unit_price'].astype(float) * df_data['trxn_item_qty'].astype(float)
        df_data.rename(columns={'trxn_item_target_curr_unit_price':'unit_price'}, inplace=True)
        df_data.rename(columns={'display_name_membership':'customer_name'}, inplace=True)
        df_data.rename(columns={'capsuite_ref_sale_order_line':'capsuite_ref'}, inplace=True)
        df_data.rename(columns={'trxn_item_qty':'sales_qty'}, inplace=True)
        df_data['trxn_date'] = pd.to_datetime(df_data['trxn_date']).dt.date
        df_data['trxn_month'] = pd.to_datetime(df_data['trxn_date']).dt.to_period('M')
        df_data['trxn_date'] = df_data['trxn_date'].astype(str)
        df_data['trxn_month'] = df_data['trxn_month'].astype(str)
        df_data = df_data[['trxn_item_id','trxn_id','sales_amount','unit_price','sales_qty','trxn_item_discount_amt','trxn_date','trxn_channel','staff_name','customer_name','prod_category','prod_type','prod_name','capsuite_ref','gender','age','trxn_month']]
        
    except Exception as e:
        print(f"Error while: {e}")

    finally:
        # engine.dispose()
        return df_data


# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')


def random_response(message):
    max_attempts = 1  # Set the maximum number of attempts
    attempts = 0
    while attempts < max_attempts:
        try:
            df_data = choose_table(message)

            question = message
            # fill na with empty string
            df_data.fillna('', inplace=True)
            # loop columns, if column is object type, convert to string
            for col in df_data.columns:
                if df_data[col].dtype == 'object':
                    df_data[col] = df_data[col].astype(str)

            text_gen = OpenAITextGenerator(
                provider='openai',
                api_type='azure',
                azure_endpoint= os.getenv('AZURE_OPENAI_ENDPOINT'),
                api_key= os.getenv('OPENAI_API_KEY'),
                api_version = '2023-05-15',
                
            )
            lida = Manager(text_gen=text_gen)

            text_gen_config = TextGenerationConfig(
                n = 1,
                model = 'CapSuiteGPT35T16K',
                temperature=0
            )

            summary = lida.summarize(df_data)
            print(f'*'*50)
            pprint(f"{summary}")

            str_summary = str(summary)
            
            print(f'*'*50)
            time_now = pd.Timestamp.now()
            print(f'Datetime now:{time_now}')
            goals = lida.goals(summary, n=1, textgen_config=text_gen_config,persona=f'An data analyst of the company who want to know {question}')

            print(f'goals: {goals[0]}')

            output_parser = CommaSeparatedListOutputParser()
            # "Bussiness insights focus on different aspects of the data, such as sales amount,sales qty, product category, time, etc."
            model = AzureChatOpenAI(
                deployment_name="CapSuiteGPT4omini",
                openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
                temperature=0
            )

            str_summary = str(summary)
            prompt = ChatPromptTemplate.from_template("Based on the data below:{str_summary},"
                                                    # "please give me the most related and useful possible question to get simple but useful insights for {question}."
                                                    "The data is sales order line is every transaction of the company."
                                                    "Base on the question:{question}, regenerate the output"
                                                    # "Your output will be used to guide the graph generation by python using ploty, so make it simple and easier to process data."
                                                    "If the original question is not metion time related varibles,do not add it."
                                                    "For example: 'Goal(question='What are the sales trends by product category?visualization='bar chart of prod_category against sum(trxn_item_qty) grouped by trxn_date'. and"
                                                    "'Goal(question='Who are the top customers based on transaction count?', visualization='Bar chart of customer_name vs. count(trxn_id)')"
                                                    "If top in your output Goal question, default it to 10."
                                                    "The visualization should align with the question and the data."
                                                    "Usually, when deal with:age, show all the data."
                                                    "But for other datas beside age: customer,prouct,sales,qty,etc show top 10."
                                                    "Process the top data at last when put in the graph."
                                                    "When ask customers,customer, it means customer_name."
                                                    "When ask product, it means prod_name."
                                                    "When ask category, it means prod_category."
                                                    "etc, find the right column name exsiting in the data."
                                                    "If the data columns is empty, please ignore the column."
                                                    "Only output 1 question."
                                                    "")


            chain = (
                {"str_summary": RunnablePassthrough(),"question": RunnablePassthrough()}
                | prompt
                | model
                | output_parser
            )

            insights = chain.invoke({"str_summary": str_summary, "question": question})
            print(f'*'*50)
            print(f'insights: {insights}')


            # ValueError: Unsupported library. Choose from 'matplotlib', 'seaborn', 'plotly', 'bokeh', 'ggplot', 'altair'.

            
            try:
                temp_chart = lida.visualize(summary=summary, goal=str(insights)+"Graph heigh 800,width 1000.Set different colors to different varibles.x label rotate 60 degree,do not use the guide line", textgen_config=text_gen_config,library='matplotlib')
                print(f'*'*50)
                code = temp_chart[0].code
                print(f"{code}")
                # instructions = ["change the color of the graph to #4169E1 if there is only one variable","change the background color to white but keep the grid lines grey","set the average line for the graph to be red"]
                # edited_chart = lida.edit(code=code,summary=summary,instructions=instructions,library='plotly',textgen_config = text_gen_config)
            except Exception as e:
                print(f"Error while: {e}")
            
            temp_chart[0].savefig(f'chart_1.png')
            print(f'*'*50)
            print(f"Chart  saved")

            # Path to your image
            image_path = "chart_1.png"

            # Open the image file
            # img = Image.open(image_path)
            img = mpimg.imread('chart_1.png')
            print(f'*'*50)
            print(f"Image opened")

            base64_image = encode_image(image_path)

            llm = model

            response = llm.invoke(
                [
                    HumanMessage(
                        content=[
                            {"type": "text", "text": f"Give me some business insights base on the graph, contain exact number conclusion."},
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}"
                                },
                            },
                        ]
                    )
                ]
            )
            final_result_str = response.content

            return final_result_str,img
        except Exception as e:
            attempts += 1
            print(f"Attempt {attempts} failed with error: {e}")
            if attempts >= max_attempts:
                return "An error occurred after multiple attempts.", None  # Return an error message




with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            temp_img = gr.Image(
                height=800
            )
        with gr.Column():
            chat_input = gr.Textbox(placeholder="Type your message here...", label="Chat")
            examples = gr.Examples(
                examples=['Top 10 prod_cate sales', 'Top product in category Seafood','Total sales amount by product category each day using line chart','What are the top selling at product level??',
                          'Sales amount distribution by age','Sales amount distribution by gender',
                          'Top customer by sales amount'
                          ],
                inputs=chat_input
            )
            chat_output = gr.Textbox(label="Response", interactive=False)
            submit_button = gr.Button("生成响应")
            submit_button.click(fn=random_response, inputs=chat_input, outputs=[chat_output, temp_img])


   
demo.launch()