import traceback import gradio as gr import numpy as np import os from langchain.chat_models import ChatOpenAI from langchain.schema import HumanMessage, SystemMessage, AIMessage from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages import openai os.environ["OPENAI_API_KEY"] = "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" os.environ["ANTHROPIC_API_KEY"] = \ "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" openai.api_key = os.environ["OPENAI_API_KEY"] # chat = ChatOpenAI(model="gpt-4-turbo-preview") # chat = ChatOpenAI(model="gpt-3.5-turbo") structure = """ # Headline {{ headline | bold, inspiring, action-oriented, max 8 words }} ## Introduction {{ introduction_paragraph | motivational, passionate, 2-3 sentences }} ## Features and Benefits {% for feature in features %} ### Feature {{ loop.index }}: {{ feature.name | dynamic, direct, 5-6 words }} {{ feature.details | energetic, clear, 3-4 sentences }} {% endfor %} ## Technical Specifications {{ technical_specs | informative, to the point, concise list format }} """ features = "Brand: Duckly. \nProduct name: Duck runner pro. \nKey properties: t-shirt, for running, sweat wicking, for marathon, 100% cotton." reference = " " import base64 import requests # OpenAI API Key # Function to encode the image def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') import json def get_json(text: str): text = text.strip().replace('`', '').replace('json', '') if text.startswith("No garment detected"): return { "features": [], "intended_use": [], "alt_text": [] } return json.loads(text) def detect_features(image_paths): # Path to your image # image_path = "path_to_your_image.jpg" # Getting the base64 string try: base64_images = [encode_image(image_path[0]) for image_path in image_paths] headers = { "Content-Type": "application/json", "Authorization": f"Bearer {openai.api_key}" } payload = { "model": "gpt-4-vision-preview", "messages": [ { "role": "user", "content": [ { "type": "text", "text": """Describe the features of the garment in the photos in less than 100 words. What is the intended use of the garment in this image, use at most 5 words for intended use? Generate alt text for each of the images. If the photo does not contain a garment, return 'No garment detected'. If the photo contains a garment, return the result in in the following JSON format without any preceding or trailing text: { "features": [list of comma separated features], "intended_use": [list of comma separated intended uses], "alt_text": [alt text for image 1, alt text for image 2] }""" }, ] + [{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } for base64_image in base64_images] } ], "temperature": 0.0, "max_tokens": 300 } response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) response = response.json() print("image features", response["choices"][0]['message']['content']) jresponse = get_json(response["choices"][0]['message']['content']) return jresponse, base64_images except Exception as e: print(e.__class__, e) traceback.print_exc() return "", [] def generate(features, image, structure, reference, model, temperature): print(f"features {features},\n image {image},\n structure{structure},\n model{model},\n temperature {temperature},\n reference {reference}") image_features, base64_images = detect_features(image) detected_features = "" intended_use = "" alt_texts = [] if image_features is not None and len(image_features) > 0: alt_texts = image_features["alt_text"] detected_features = ", ".join(image_features["features"]) intended_use = "Intended use: " + ", ".join(image_features["intended_use"]) print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}") if model.startswith("gpt"): chat = ChatOpenAI(model=model) else: chat = ChatAnthropic(model_name=model, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"]) messages = [HumanMessage(content=f"""Write a product description of about 200 words for a product with the following key properties. Make sure that the description follows the structure of the reference structure below. Use creative language that is suitable for e-commerce websites. Use a consistent tone of voice throughout the text. If the reference text is not empty, produce the product description in the tone of voice and structure of the reference text. \n\n {features + detected_features} \n{intended_use} \nReference structure: {structure}\n Reference text: {reference}""")] chat_response = chat.invoke(messages, temperature=temperature) print(messages[0].content) return chat_response.content + '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) + '\n'.join([f'![Product photo](data:image/png;base64,{base64_image} "{alt_text}")' if base64_image != "" else "" for (base64_image, alt_text) in zip(base64_images, alt_texts)]) demo = gr.Interface( fn=generate, inputs=[gr.Textbox(features, label="Features"), gr.Gallery(label="Product image", type="filepath"), gr.Textbox(structure, label="Structure"), gr.Textbox(reference, label="Reference copy"), gr.Dropdown(["gpt-4-turbo", "gpt-3.5-turbo", "claude-3-sonnet-20240229", "claude-3-opus-20240229"], value="gpt-3.5-turbo", label="Model"), gr.Slider(minimum=0., maximum=1.0, value=0.5, label="Temperature")], outputs=["markdown"], ) # demo.launch() import argparse parser = argparse.ArgumentParser(description='Run the Gradio demo') parser.add_argument('--public', action='store_true', help='Expose the demo to the public') args = parser.parse_args() # if args.public: # demo.launch(share=True) # else: # demo.launch(server_name="0.0.0.0", server_port=7868) if __name__ == "__main__": demo.launch()