File size: 3,577 Bytes
29468f4 69394d2 29468f4 18e5ea6 29468f4 40279cd 69394d2 29468f4 69394d2 29468f4 40279cd 18e5ea6 29468f4 bc81999 29468f4 18e5ea6 c182983 29468f4 18e5ea6 29468f4 18e5ea6 29468f4 bc81999 18e5ea6 29468f4 18e5ea6 29468f4 18e5ea6 29468f4 18e5ea6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | import os
import torch
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from transformers import BlipProcessor,BlipForConditionalGeneration
from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput
import requests
from PIL import Image
from langchain.tools import BaseTool
import gradio as gr
from langchain import PromptTemplate, FewShotPromptTemplate, LLMChain
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# OPENAI_API_BASE = os.getenv("OPENAI_AI_BASE")
# DEP_NAME = os.getenv("deployment name")
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name='gpt-3.5-turbo')
# llm = AzureChatOpenAI(deployment_name=DEP_NAME,openai_api_base=OPENAI_API_BASE,openai_api_key=OPENAI_API_KEY,openai_api_version="2023-03-15-preview",model_name="gpt-3.5-turbo")
image_to_text_model = "Salesforce/blip-image-captioning-large"
# image_to_text_model = "F:\\code\\Anaconda\\blip-image-captioning-large"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
processor = BlipProcessor.from_pretrained(image_to_text_model)
model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)
def describeImage(image):
# image_object = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
image_object = Image.open(image).convert('RGB')
# image
inputs = processor(image_object, return_tensors="pt").to(device)
outputs = model.generate(**inputs)
return processor.decode(outputs[0], skip_special_tokens=True)
#img_url = 'https://img1.baidu.com/it/u=1919509102,1927615551&fm=253&fmt=auto&app=120&f=JPEG?w=889&h=500'
# description = describeImage(img_url)
# print(description)
class DescribeImageTool(BaseTool):
name = "Describe Image Tool"
description = 'use this tool to describe an image.'
def _run(self, url: str):
description = describeImage(url)
return description
def _arun(self, query: str):
raise NotImplementedError("Async operation not supported yet")
tools = [DescribeImageTool()]
agent = initialize_agent(
agent='chat-conversational-react-description',
tools=tools,
llm=llm,
verbose=True,
max_iterations=3,
early_stopping_method='generate',
memory=ConversationBufferWindowMemory(
memory_key='chat_history',
k=5,
return_messages=True
)
)
# 翻译
def to_chinese(title):
pp = "翻译下面语句到中文\n{en}"
prompt = PromptTemplate(
input_variables=["en"],
template=pp
)
llchain = LLMChain(llm=llm, prompt=prompt)
return llchain.run(title)
def descImage(input_text , image_url) :
output = agent(f"{input_text}:\n{image_url}")
print( output )
desc = output['output']
# 对描述文字进行转换
desc_ch = to_chinese(desc)
return desc_ch
# ret = descImage("描述该图片","https://lmg.jj20.com/up/allimg/4k/s/02/2109250006343S5-0-lp.jpg")
# print(ret)
with gr.Blocks() as demo:
with gr.Column():
file = gr.Image(type='filepath')
user_input = gr.Textbox(show_label=False,placeholder="请输入问题",lines=1)
with gr.Column():
submitBtn = gr.Button("提交",variant="primary")
with gr.Column():
output = gr.TextArea(show_label=False,placeholder="输出结果",lines=5)
submitBtn.click(descImage,[user_input,file],output,show_progress=True)
demo.launch() |