File size: 3,577 Bytes
29468f4
 
 
 
 
 
69394d2
29468f4
 
 
 
 
 
 
18e5ea6
29468f4
40279cd
69394d2
 
29468f4
69394d2
 
29468f4
40279cd
 
18e5ea6
29468f4
 
 
 
 
bc81999
 
 
29468f4
 
 
 
 
18e5ea6
c182983
29468f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18e5ea6
 
 
 
 
 
 
 
 
 
 
 
 
 
29468f4
 
18e5ea6
 
 
 
 
 
29468f4
bc81999
18e5ea6
 
 
 
 
 
29468f4
18e5ea6
 
29468f4
18e5ea6
29468f4
18e5ea6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import torch
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from transformers import BlipProcessor,BlipForConditionalGeneration
from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput
import requests
from PIL import Image
from langchain.tools import BaseTool
import gradio as gr
from langchain import PromptTemplate, FewShotPromptTemplate, LLMChain

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# OPENAI_API_BASE = os.getenv("OPENAI_AI_BASE")
# DEP_NAME = os.getenv("deployment name")

llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name='gpt-3.5-turbo')
# llm = AzureChatOpenAI(deployment_name=DEP_NAME,openai_api_base=OPENAI_API_BASE,openai_api_key=OPENAI_API_KEY,openai_api_version="2023-03-15-preview",model_name="gpt-3.5-turbo")

image_to_text_model = "Salesforce/blip-image-captioning-large"
# image_to_text_model = "F:\\code\\Anaconda\\blip-image-captioning-large"

device = 'cuda' if torch.cuda.is_available() else 'cpu'

processor = BlipProcessor.from_pretrained(image_to_text_model)
model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)

def describeImage(image):
  # image_object = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
  image_object = Image.open(image).convert('RGB')
  # image
  inputs = processor(image_object, return_tensors="pt").to(device)
  outputs = model.generate(**inputs)
  return processor.decode(outputs[0], skip_special_tokens=True)

#img_url = 'https://img1.baidu.com/it/u=1919509102,1927615551&fm=253&fmt=auto&app=120&f=JPEG?w=889&h=500'
# description = describeImage(img_url)
# print(description)


class DescribeImageTool(BaseTool):
  name = "Describe Image Tool"
  description = 'use this tool to describe an image.'

  def _run(self, url: str):
    description = describeImage(url)
    return description

  def _arun(self, query: str):
    raise NotImplementedError("Async operation not supported yet")


tools = [DescribeImageTool()]


agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=ConversationBufferWindowMemory(
        memory_key='chat_history',
        k=5,
        return_messages=True
    )
)

# 翻译
def to_chinese(title):
    pp = "翻译下面语句到中文\n{en}"
    prompt = PromptTemplate(
        input_variables=["en"],
        template=pp
    )
    llchain = LLMChain(llm=llm, prompt=prompt)
    return llchain.run(title)



def descImage(input_text , image_url) :
    output = agent(f"{input_text}:\n{image_url}")
    print( output )
    desc = output['output']
    # 对描述文字进行转换
    desc_ch = to_chinese(desc)
    return desc_ch

# ret = descImage("描述该图片","https://lmg.jj20.com/up/allimg/4k/s/02/2109250006343S5-0-lp.jpg")
# print(ret)


with gr.Blocks() as demo:
    with gr.Column():
        file = gr.Image(type='filepath')
        user_input = gr.Textbox(show_label=False,placeholder="请输入问题",lines=1)
    with gr.Column():
        submitBtn = gr.Button("提交",variant="primary")

    with gr.Column():
        output = gr.TextArea(show_label=False,placeholder="输出结果",lines=5)

    submitBtn.click(descImage,[user_input,file],output,show_progress=True)

demo.launch()