| | import os |
| | import torch |
| | from langchain.agents import load_tools |
| | from langchain.agents import initialize_agent |
| | from langchain.agents import AgentType |
| | from langchain.llms import OpenAI |
| | from langchain.chat_models import ChatOpenAI |
| | from langchain.chains.conversation.memory import ConversationBufferWindowMemory |
| | from transformers import BlipProcessor,BlipForConditionalGeneration |
| | from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput |
| | import requests |
| | from PIL import Image |
| | from langchain.tools import BaseTool |
| | import gradio as gr |
| | from langchain import PromptTemplate, FewShotPromptTemplate, LLMChain |
| |
|
| | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
| | |
| | |
| |
|
| | llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name='gpt-3.5-turbo') |
| | |
| |
|
| | image_to_text_model = "Salesforce/blip-image-captioning-large" |
| | |
| |
|
| | device = 'cuda' if torch.cuda.is_available() else 'cpu' |
| |
|
| | processor = BlipProcessor.from_pretrained(image_to_text_model) |
| | model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device) |
| |
|
| | def describeImage(image): |
| | |
| | image_object = Image.open(image).convert('RGB') |
| | |
| | inputs = processor(image_object, return_tensors="pt").to(device) |
| | outputs = model.generate(**inputs) |
| | return processor.decode(outputs[0], skip_special_tokens=True) |
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | class DescribeImageTool(BaseTool): |
| | name = "Describe Image Tool" |
| | description = 'use this tool to describe an image.' |
| |
|
| | def _run(self, url: str): |
| | description = describeImage(url) |
| | return description |
| |
|
| | def _arun(self, query: str): |
| | raise NotImplementedError("Async operation not supported yet") |
| |
|
| |
|
| | tools = [DescribeImageTool()] |
| |
|
| |
|
| | agent = initialize_agent( |
| | agent='chat-conversational-react-description', |
| | tools=tools, |
| | llm=llm, |
| | verbose=True, |
| | max_iterations=3, |
| | early_stopping_method='generate', |
| | memory=ConversationBufferWindowMemory( |
| | memory_key='chat_history', |
| | k=5, |
| | return_messages=True |
| | ) |
| | ) |
| |
|
| | |
| | def to_chinese(title): |
| | pp = "翻译下面语句到中文\n{en}" |
| | prompt = PromptTemplate( |
| | input_variables=["en"], |
| | template=pp |
| | ) |
| | llchain = LLMChain(llm=llm, prompt=prompt) |
| | return llchain.run(title) |
| |
|
| |
|
| |
|
| | def descImage(input_text , image_url) : |
| | output = agent(f"{input_text}:\n{image_url}") |
| | print( output ) |
| | desc = output['output'] |
| | |
| | desc_ch = to_chinese(desc) |
| | return desc_ch |
| |
|
| | |
| | |
| |
|
| |
|
| | with gr.Blocks() as demo: |
| | with gr.Column(): |
| | file = gr.Image(type='filepath') |
| | user_input = gr.Textbox(show_label=False,placeholder="请输入问题",lines=1) |
| | with gr.Column(): |
| | submitBtn = gr.Button("提交",variant="primary") |
| |
|
| | with gr.Column(): |
| | output = gr.TextArea(show_label=False,placeholder="输出结果",lines=5) |
| |
|
| | submitBtn.click(descImage,[user_input,file],output,show_progress=True) |
| |
|
| | demo.launch() |