File size: 3,443 Bytes
6fbde19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
from PIL import Image

import requests
import base64

import os
import openai
openai.api_type = "azure"
openai.api_base = "https://kidgpt.openai.azure.com/"
openai.api_version = "2023-03-15-preview"
#openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = "c10faac20a3f4008b63a573ab3afc30b"


def get_completion(text, model="Demo-gpt-35-turbo"): #  this is the latest 3.5 turbo 16k model

    prompt = f"""
    对以下```符号内是通过图片ocr识别的资料,请帮我完成以下任务:
    1. 分析博主背景信息
    2. 分析这位博主潜在受众
    3. 帮这位博主设计5个未来值得做内容标题
    4. 如果作为粉丝想与这位博主互动,给出5个博主会关心很可能会回答的问题(用平等的语气)

    输出格式如下:
    #background#
    <分析博主背景信息>

    #audience#
    <分析博主潜在受众>

    #topic#
    <设计5个未来值得做的内容标题>

    #questions#
    <设计5个能与博主关心的提问>

    ```{text}```
    \n\n\n
    """

    messages = [{"role":"system","content":"You are an AI assistant that helps people find information."},
                {"role":"user","content":prompt}]
    response = openai.ChatCompletion.create(
        engine=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
        max_tokens=1500,
        #top_p=0.95,
        #frequency_penalty=0,
        #presence_penalty=0,
        stop="\n\n\n"   #避免重复
    )
    return response.choices[0].message["content"]




def recognize_text(input_image1, input_image2):

    ocr = PaddleOCR(use_angle_cls=True, lang='ch') # need to run only once to download and load model into memory

    # 将上传的图像转换为PIL图像对象
    #image = Image.open(input_image.name)

    if input_image1 is None:
      text = "请上传图片"
      response = "请上传图片"
      return text, response

    # 使用Tesseract进行OCR识别
    result = ocr.ocr(input_image1, cls=True)
    text = ""
    text1 = ""
    for idx in range(len(result)):
        res = result[idx]
        for line in res:
            txts = line[1][0]
            text = text + txts + '\n'

    text1 = text
    print(text1)

    text2 = ""
    if input_image2 is not None:
      result = ocr.ocr(input_image1, cls=True)
      text = ""
      for idx in range(len(result)):
          res = result[idx]
          for line in res:
              txts = line[1][0]
              text = text + txts + '\n'

      text2 = text
      print(text2)

    text = text1 + text2

    response = get_completion(text)
    #response = get_completion(result)
    print(response)

    return text, response



# # 创建输入组件和输出组件
# input_images = gr.inputs.File(file_count="multiple")
input_image1 = gr.inputs.Image(type="filepath", label="上传图片1")
print("type:", type(input_image1))

input_image2 = gr.inputs.Image(type="filepath", label="上传图片2")

output_text1 = gr.outputs.Textbox(label="识别的文字")
output_text2 = gr.outputs.Textbox(label="GhatGPT文字")



# # 创建Gradio接口
iface = gr.Interface(fn=recognize_text, inputs=[input_image1, input_image2], outputs=[output_text1, output_text2], title="图片文字识别", allow_flagging=False)
iface.launch(share=True, debug = True)

#print(pytesseract.image_to_string(image, lang='chi_sim', config='--psm 6'))