qichangyuan commited on
Commit
29468f4
·
1 Parent(s): d88d36f

新建app.py,requirements.txt

Browse files
Files changed (2) hide show
  1. app.py +86 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from langchain.agents import load_tools
4
+ from langchain.agents import initialize_agent
5
+ from langchain.agents import AgentType
6
+ from langchain.llms import OpenAI
7
+ from langchain.chat_models import AzureChatOpenAI
8
+ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
9
+ from transformers import BlipProcessor,BlipForConditionalGeneration
10
+ from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput
11
+ import requests
12
+ from PIL import Image
13
+ from langchain.tools import BaseTool
14
+ import gradio as gr
15
+
16
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
17
+ OPENAI_API_BASE = os.getenv("OPENAI_AI_BASE")
18
+ DEP_NAME = os.getenv("deployment name")
19
+
20
+ # llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name='gpt-3.5-turbo')
21
+ llm = AzureChatOpenAI(deployment_name=DEP_NAME,openai_api_base=OPENAI_API_BASE,openai_api_key=OPENAI_API_KEY,openai_api_version="2023-03-15-preview",model_name="gpt-3.5-turbo")
22
+
23
+ image_to_text_model = "Salesforce/blip-image-captioning-large"
24
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
25
+
26
+ processor = BlipProcessor.from_pretrained(image_to_text_model)
27
+ model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)
28
+
29
+ def describeImage(image_url):
30
+ image_object = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
31
+ # image
32
+ inputs = processor(image_object, return_tensors="pt").to(device)
33
+ outputs = model.generate(**inputs)
34
+ return processor.decode(outputs[0], skip_special_tokens=True)
35
+
36
+ img_url = 'https://img1.baidu.com/it/u=1919509102,1927615551&fm=253&fmt=auto&app=120&f=JPEG?w=889&h=500'
37
+ description = describeImage(img_url)
38
+ # print(description)
39
+
40
+
41
+ class DescribeImageTool(BaseTool):
42
+ name = "Describe Image Tool"
43
+ description = 'use this tool to describe an image.'
44
+
45
+ def _run(self, url: str):
46
+ description = describeImage(url)
47
+ return description
48
+
49
+ def _arun(self, query: str):
50
+ raise NotImplementedError("Async operation not supported yet")
51
+
52
+
53
+ tools = [DescribeImageTool()]
54
+
55
+
56
+ agent = initialize_agent(
57
+ agent='chat-conversational-react-description',
58
+ tools=tools,
59
+ llm=llm,
60
+ verbose=True,
61
+ max_iterations=3,
62
+ early_stopping_method='generate',
63
+ memory=ConversationBufferWindowMemory(
64
+ memory_key='chat_history',
65
+ k=5,
66
+ return_messages=True
67
+ )
68
+ )
69
+
70
+ def descImage(req , image_url) :
71
+ output = agent(f"{req}:\n{image_url}")
72
+ print( output )
73
+ desc = output['output']
74
+ return desc
75
+
76
+ desc = descImage('描述该图片' , img_url)
77
+ # print(desc)
78
+
79
+ output = agent(f"将该段翻译为中文:{desc}")
80
+ print(output['output'])
81
+
82
+ def greet(name):
83
+ return "Hello " + name + "!!"
84
+
85
+ if __name__ == "__main__":
86
+ iface = gr.Interface(fn=descImage,inputs="text",output="text")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ transformers
4
+ torch
5
+ pillow
6
+ gradio