Spaces:

Eladlev
/

simple_visual_agent

Sleeping

App Files Files Community

Eladlev commited on Sep 16, 2024

Commit

2e4600f

verified ·

1 Parent(s): 540d72d

Upload 2 files

Browse files

Files changed (2) hide show

app.py +156 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,156 @@

+from langchain.agents import create_tool_calling_agent
+from langchain.agents import AgentExecutor
+import os
+from langchain_openai import ChatOpenAI
+from langchain.agents import  Tool
+from langchain_community.utilities import GoogleSerperAPIWrapper
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.messages import HumanMessage, AIMessage
+import base64
+from PIL import Image
+import io
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+os.environ["SERPER_API_KEY"] = '23'
+os.environ['OPENAI_API_KEY'] = "skc"
+llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY'])
+search = GoogleSerperAPIWrapper()
+tools = [
+    Tool(
+        name="web_search",
+        func=search.run,
+        description="useful for when you need to extract **updated** information from the web"
+    )
+]
+# prompt = ChatPromptTemplate.from_messages([
+#     self.system_prompt,
+#     self.source_prompt,
+#     self.generate_eval_message(url)])
+agent_prompt = ChatPromptTemplate.from_messages(
+    [
+        (
+            "system",
+            "You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.",
+        ),
+        ("human", "placeholder"),
+        ("placeholder", "{chat_history}"),
+        ("human", "{input}"),
+        ("placeholder", "{agent_scratchpad}"),
+    ]
+)
+agent = create_tool_calling_agent(llm, tools, agent_prompt)
+agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+import gradio as gr
+import os
+from openai import OpenAI
+with gr.Blocks() as demo:
+    with gr.Row():
+        image = gr.Image(label="image", height=600)
+        chatbot = gr.Chatbot()
+    prompt = gr.Textbox(label="prompt")
+    serper_api = gr.Textbox(label="Serper API key")
+    openai_key = gr.Textbox(label="OpenAI API key")
+    gr.Examples(
+        examples=[
+            ["https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png",
+             "Describe what is in the image",
+             "https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png"]
+        ],
+        inputs=[image, prompt],
+    )
+    def respond(message, chat_history, image):
+        # Convert NumPy array to an Image object
+        agent_input_history = []
+        for c in chat_history:
+            agent_input_history.extend([HumanMessage(content=c[0]), AIMessage(content=c[1])])
+        out = agent_executor.invoke(
+            {
+                "input": message,
+                "chat_history": agent_input_history,
+            }
+        )
+        chat_history.append((message, out['output']))
+        return "", chat_history
+    def update_serper_api(serper_api):
+        os.environ["SERPER_API_KEY"] = serper_api
+        search = GoogleSerperAPIWrapper()
+        global tools
+        tools = [
+            Tool(
+                name="Web search",
+                func=search.run,
+                description="useful for when you need to extract **updated** information from the web"
+            )
+        ]
+        agent = create_tool_calling_agent(llm, tools, agent_prompt)
+        global agent_executor
+        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+    def update_agent(openai_key):
+        os.environ['OPENAI_API_KEY'] = openai_key
+        llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY'])
+        agent = create_tool_calling_agent(llm, tools, agent_prompt)
+        global agent_executor
+        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+    def change_image(image):
+        image_pil = Image.fromarray(image)
+        # Save the image to a bytes buffer
+        buffer = io.BytesIO()
+        image_pil.save(buffer, format="PNG")  # You can also use "JPEG" if needed
+        # Get the byte data from the buffer and encode it to base64
+        image_bytes = buffer.getvalue()
+        image_base64 = base64.b64encode(image_bytes).decode('utf-8')
+        message_content = [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,"
+                                                                                      f"{image_base64}"}}]
+        image_message = HumanMessage(content=message_content)
+        global agent_prompt
+        agent_prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    "You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.",
+                ),
+                image_message,
+                ("placeholder", "{chat_history}"),
+                ("human", "{input}"),
+                ("placeholder", "{agent_scratchpad}"),
+            ]
+        )
+        agent = create_tool_calling_agent(llm, tools, agent_prompt)
+        global agent_executor
+        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+    prompt.submit(respond, [prompt, chatbot, image], [prompt, chatbot])
+    openai_key.submit(update_agent, [openai_key], [])
+    serper_api.submit(update_serper_api, [serper_api], [])
+    image.change(change_image,[image],[])
+demo.queue().launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+tqdm==4.66.1
+langchain==0.2.7
+openai==1.35.10
+tiktoken==0.7.0
+easydict==1.11
+sentence-transformers==2.2.2
+langchain-google-genai==1.0.8
+pillow==10.2.0
+langchain_openai==0.1.20
+langchain_community
+gradio