| | ''' |
| | 使用OpenAI的官方Assistant,实现code_interpreter功能: |
| | 1. 使用OpenAI官方的API教程: |
| | https://cookbook.openai.com/examples/assistants_api_overview_python (重要) |
| | https://platform.openai.com/docs/assistants/overview |
| | 2. YT视频: https://www.youtube.com/watch?v=5rcjGjgJNQc |
| | 3. 基础代码参考:openai_interpreter.py |
| | 4. 函数化text_response和image_response和prompt。 |
| | 5. 中文字体显示还是有问题,现在妥协的方式是转换成英文来显示。不影响分析过程和结果。 |
| | |
| | ##TEMP |
| | |
| | import matplotlib |
| | import seaborn as sns |
| | SimHei = FontProperties(fname='SimHei.ttf') |
| | matplotlib.rcParams['font.sans-serif'] = ['SimHei'] |
| | sns.set(rc={'axes.facecolor':'#FFF9ED','figure.facecolor':'#FFF9ED'}, palette='dark', font='SimHei') |
| | fontproperties = SimHei |
| | |
| | 每次输出图表时,你都需要在matplotlib和seaborn中安装中文字体'SimHei',安装的路径在:'/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/SimHei.ttf'。然后,你需要在mattplotlib和seaborn中设置字体为'SimHei'。 |
| | |
| | ''' |
| | |
| |
|
| | |
| | import csv |
| | import os |
| | import pathlib |
| | import re |
| | import tempfile |
| | import time |
| | import warnings |
| | from itertools import product |
| | from pathlib import Path |
| | from re import sub |
| | from tempfile import NamedTemporaryFile |
| | from time import sleep |
| |
|
| | import matplotlib.pyplot as plt |
| | import numpy as np |
| | import openai |
| | import pandas as pd |
| | import requests |
| | import seaborn as sns |
| | from dotenv import load_dotenv |
| | from matplotlib.pyplot import style |
| | from rich import print |
| | from tqdm import tqdm, tqdm_notebook, trange |
| | import chatgpt |
| |
|
| | import streamlit as st |
| | import add_fonts |
| |
|
| | warnings.filterwarnings('ignore') |
| | sns.set() |
| |
|
| | load_dotenv() |
| | |
| | os.environ["OPENAI_API_KEY"] = os.environ['user_token'] |
| | openai.api_key = os.environ['user_token'] |
| |
|
| | from openai import Client |
| | client = Client() |
| |
|
| | |
| | import json |
| | def show_json(name=None,obj=None): |
| | |
| | print(name,":",json.loads(obj.model_dump_json())) |
| | print("--"*40) |
| |
|
| | def save_json(obj): |
| | json_message = json.dumps(obj.model_dump_json(), indent=4) |
| | with open('message.json', 'w') as file: |
| | file.write(json_message) |
| |
|
| | |
| | def openai_assistant(prompt=None, filepath=None, username=None): |
| | client = Client() |
| | file = client.files.create( |
| | file=open(filepath, 'rb'), |
| | |
| | purpose='assistants', |
| | ) |
| | |
| |
|
| | |
| | assistant = client.beta.assistants.create( |
| | name="AI Expert", |
| | |
| | instructions=""" |
| | 你是一个强大的AI助手。当被问到一个问题时,你需要根据提供给你的文件中的信息来回答这个问题。如果我没有告诉你任何定制化的要求,那么请你按照以下的默认要求来回答: |
| | ------------------------------------------------------------------------- |
| | 1. 你需要用我提问的语言来回答。 |
| | 2. 如果要求你输出图表,那么图的解析度dpi需要设定为600。图尽量使用seaborn库。 |
| | 3. 图表上如果有非英文的文字,那么你需要将字体翻译为英文,然后显示。 |
| | 4. 你回答的文字内容必须尽可能的详细且通俗易懂。 |
| | 5. 回答时尽可能地展示分析所对应的图表,并提供分析结果。 你需要按如下格式提供内容: |
| | 5.1 提供详细且专业的分析结果,提供足够的分析依据。 |
| | 5.2 给出可能造成这一结果的可能原因有哪些? |
| | 以上内容全部用1, 2, 3这样的序列号格式来表达。 |
| | """, |
| | |
| | tools=[{"type": "code_interpreter"}], |
| | |
| | model="gpt-3.5-turbo-1106", |
| | file_ids=[file.id], |
| | |
| | ) |
| | |
| | |
| |
|
| | |
| | thread = client.beta.threads.create( |
| | messages=[ |
| | { |
| | "role": "user", |
| | |
| | "content": prompt, |
| | |
| | "file_ids": [file.id] |
| | } |
| | ], |
| | ) |
| | run = client.beta.threads.runs.create( |
| | thread_id = thread.id, |
| | assistant_id = assistant.id, |
| | ) |
| |
|
| | run = client.beta.threads.runs.retrieve( |
| | run_id = run.id, |
| | thread_id = thread.id, |
| | timeout=100, |
| | ) |
| |
|
| | |
| | import time |
| | def wait_on_run(run, thread): |
| | while run.status == "queued" or run.status == "in_progress": |
| | run = client.beta.threads.runs.retrieve( |
| | thread_id=thread.id, |
| | run_id=run.id, |
| | ) |
| | time.sleep(0.5) |
| | return run |
| | run = wait_on_run(run, thread) |
| |
|
| | |
| | messages = client.beta.threads.messages.list(thread_id=thread.id) |
| | |
| | show_json(name='messages:',obj=messages) |
| | print('--'*40) |
| | save_json(obj=messages) |
| |
|
| | |
| | |
| | |
| | |
| | image_response = [] |
| | imagefile_count = 0 |
| | imagefile_position = [] |
| | for i in range(len(messages.data)): |
| | for j in range(len(messages.data[i].content)): |
| | try: |
| | if messages.data[i].content[j].image_file: |
| | imagefile_count += 1 |
| | imagefile_position.append((i,j)) |
| | except: |
| | pass |
| |
|
| | print('--'*30) |
| | print("总共有几张图片?:", imagefile_count) |
| | print('--'*30) |
| | |
| | print('start the image and text repsonse process!') |
| | |
| | image_name = [] |
| | image_files = [] |
| | for x, y in imagefile_position: |
| | random_num = np.random.randint(10000, 50000) |
| | print('x,y=', x, y) |
| | try: |
| | if messages.data[x].content[y].image_file: |
| | |
| | image_file = openai.files.content(file_id=messages.data[x].content[y].image_file.file_id) |
| | image_files.append(image_file) |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | except Exception as e: |
| | print(f"An error occurred: {e}") |
| | pass |
| | |
| | |
| | text_response_num = 0 |
| | my_msg = [] |
| | for x in range(5): |
| | for y in range(5): |
| | try: |
| | if messages.data[x].content[y].text: |
| | print('x, y=', x, y) |
| | my_msg.append(messages.data[x].content[y].text.value) |
| | text_response_num += 1 |
| | except: |
| | pass |
| | final_msg = sorted(my_msg[1:], reverse=True) |
| | text_response = str() |
| | for i in range(len(final_msg)): |
| | text_response += final_msg[i] |
| | print('final_msg:', final_msg) |
| | print('总共有几个text response:', text_response_num) |
| | |
| | |
| | |
| | |
| | |
| | |
| | import chatgpt |
| | |
| | user_prompt = f"""首先,我会向你提供一段【文字内容】,这段文字中可能包括了一系列的多轮对话的内容。接着,我需要你根据这段文字中的内容整理成一段文字结论。你的回答风格需要很专业,包括:尽可能的包含统计数据、数字和专业的结论,不能有口语化的表达。【文字内容】如下{text_response}。""" |
| | final_answer = chatgpt.chatgpt(user_prompt=user_prompt) |
| | |
| | |
| | return messages, text_response, image_response, image_files, final_answer |
| |
|
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |