import gradio as gr from PIL import Image import requests import base64 import os import openai openai.api_type = "azure" openai.api_base = "https://kidgpt.openai.azure.com/" openai.api_version = "2023-03-15-preview" #openai.api_key = os.getenv("OPENAI_API_KEY") openai.api_key = "c10faac20a3f4008b63a573ab3afc30b" def get_completion(text, model="Demo-gpt-35-turbo"): # this is the latest 3.5 turbo 16k model prompt = f""" 对以下```符号内是通过图片ocr识别的资料,请帮我完成以下任务: 1. 分析博主背景信息 2. 分析这位博主潜在受众 3. 帮这位博主设计5个未来值得做内容标题 4. 如果作为粉丝想与这位博主互动,给出5个博主会关心很可能会回答的问题(用平等的语气) 输出格式如下: #background# <分析博主背景信息> #audience# <分析博主潜在受众> #topic# <设计5个未来值得做的内容标题> #questions# <设计5个能与博主关心的提问> ```{text}``` \n\n\n """ messages = [{"role":"system","content":"You are an AI assistant that helps people find information."}, {"role":"user","content":prompt}] response = openai.ChatCompletion.create( engine=model, messages=messages, temperature=0, # this is the degree of randomness of the model's output max_tokens=1500, #top_p=0.95, #frequency_penalty=0, #presence_penalty=0, stop="\n\n\n" #避免重复 ) return response.choices[0].message["content"] def recognize_text(input_image1, input_image2): ocr = PaddleOCR(use_angle_cls=True, lang='ch') # need to run only once to download and load model into memory # 将上传的图像转换为PIL图像对象 #image = Image.open(input_image.name) if input_image1 is None: text = "请上传图片" response = "请上传图片" return text, response # 使用Tesseract进行OCR识别 result = ocr.ocr(input_image1, cls=True) text = "" text1 = "" for idx in range(len(result)): res = result[idx] for line in res: txts = line[1][0] text = text + txts + '\n' text1 = text print(text1) text2 = "" if input_image2 is not None: result = ocr.ocr(input_image1, cls=True) text = "" for idx in range(len(result)): res = result[idx] for line in res: txts = line[1][0] text = text + txts + '\n' text2 = text print(text2) text = text1 + text2 response = get_completion(text) #response = get_completion(result) print(response) return text, response # # 创建输入组件和输出组件 # input_images = gr.inputs.File(file_count="multiple") input_image1 = gr.inputs.Image(type="filepath", label="上传图片1") print("type:", type(input_image1)) input_image2 = gr.inputs.Image(type="filepath", label="上传图片2") output_text1 = gr.outputs.Textbox(label="识别的文字") output_text2 = gr.outputs.Textbox(label="GhatGPT文字") # # 创建Gradio接口 iface = gr.Interface(fn=recognize_text, inputs=[input_image1, input_image2], outputs=[output_text1, output_text2], title="图片文字识别", allow_flagging=False) iface.launch(share=True, debug = True) #print(pytesseract.image_to_string(image, lang='chi_sim', config='--psm 6'))