import random import traceback import gradio as gr import numpy as np import os from langchain_core.output_parsers import JsonOutputParser from langchain_openai.chat_models import ChatOpenAI from langchain.schema import HumanMessage, SystemMessage, AIMessage from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages from langchain_groq import ChatGroq import openai from langchain import hub from langchain_chroma import Chroma from langchain_community.document_loaders import WebBaseLoader, CSVLoader from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_core.vectorstores import VectorStoreRetriever feature_text = "Brand: Duckly. \nProduct name: Duck runner pro. \nKey properties: t-shirt, for running, sweat wicking, for marathon, 100% cotton." garment_type = "all" reference_text = "" structure_text = \ """# Headline {{ headline | inspiring, bold, action-oriented, max 8 words }} ## Introduction {{ introduction_paragraph | motivational, passionate, 2-3 sentences }} ## Features and Benefits {% for feature in features %} ### Feature {{ loop.index }}: {{ feature.name | dynamic, direct, 5-6 words }} {{ feature.details | energetic, clear, 3-4 sentences }} {% endfor %} ## Technical Specifications {{ technical_specs | informative, to the point, concise list format }} """ structure_text_1 = """[type: UK website, style=true, language=English] {{ introduction_paragraph | motivational, passionate, 1-2 sentences }} {% for feature in features as bulleted list %} {{ feature.description | dynamic, direct, 3-6 words }} {% endfor %} {{ technical_specs | informative, to the point, concise list format }}""" structure_text_2 = """[type: Japanese newsletter, style=true, language=Japanese] {{ introduction_paragraph | motivational, passionate, 3-6 sentences }}""" languages = ["American English", "British English", "German", "French", "Chinese", "Spanish", "Dutch", "Italian", "Japanese", "Polish", "Portuguese"] models = ["gpt-4-turbo", "gpt-4o", "gpt-3.5-turbo", "claude-3-sonnet-20240229", "claude-3-opus-20240229", "claude-3-5-sonnet-20240620" #"llama3-70b-8192", ] openai.api_key = os.environ["OPENAI_API_KEY"] import base64 import requests # OpenAI API Key # Function to encode the image def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') import json def get_json(text: str): text = text.strip().replace('`', '').replace('json', '') if text.startswith("No garment detected"): return { "features": [], "intended_use": [], "alt_text": [] } return json.loads(text) def detect_features(image_paths, garment_type, language="English"): # Path to your image # image_path = "path_to_your_image.jpg" # Getting the base64 string try: base64_images = [encode_image(image_path[0]) for image_path in image_paths] if garment_type == "" or garment_type == "all": garment_type = "garment" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {openai.api_key}" } payload = { "model": "gpt-4o", # "model": "gpt-4-turbo", "messages": [ { "role": "user", "content": [ { "type": "text", "text": f"""Describe the features of the {garment_type} in the photos in less than 100 words. What is the intended use of the {garment_type} in this image, use at most 5 words for intended use? Generate alt text for each of the images. Make sure to output the alt text in {language} language. If the photo does not contain a garment, return 'No garment detected'. If the photo contains a garment, return the result in in the following JSON format without any preceding or trailing text: {{ "features": [list of comma separated features], "intended_use": [list of comma separated intended uses], "alt_text": [list of alt text for image 1, alt text for image 2] }}""" }, ] + [{ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } for base64_image in base64_images] } ], "temperature": 0.0, "max_tokens": 300 } response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) print(response) response = response.json() print("image features", response["choices"][0]['message']['content']) jresponse = get_json(response["choices"][0]['message']['content']) return jresponse, base64_images except Exception as e: print(e.__class__, e) traceback.print_exc() return "", [] import re def parse_structure(struct_ref): languages = ["_"] * (len(struct_ref) // 2) types = ["_"] * (len(struct_ref) // 2) for si in range(0, len(struct_ref), 2): parts = re.findall('[a-zA-Z\n ]+', struct_ref[si]) for idx, part in enumerate(parts): if "language" in part.lower(): lang = parts[idx + 1].strip() languages[si // 2] = lang if "type" in part.lower(): type = parts[idx + 1].strip() types[si // 2] = type return types, languages def detect_language(texts, model): langs = ["_"] * len(texts) try: messages = [] lang_map = {} for i, text in enumerate(texts): if len(text.strip()) > 0: lang_mess = [HumanMessage(content=f"What is the language of the following text? Output the language only. " f"\n ```{text}```")] print(f"{lang_mess=}") messages.append(lang_mess) lang_map[i] = len(messages) - 1 detected_langs = model.batch(messages) print(f"{detected_langs=}") for k, v in lang_map.items(): langs[k] = detected_langs[v].content except Exception as e: print(e.__class__, e) traceback.print_exc() return langs def get_language(struct_lang, copy_lang): if struct_lang != "_": return struct_lang if copy_lang != "_": return copy_lang return "English" def get_model(model_name): if model_name.startswith("gpt"): chat = ChatOpenAI(model=model_name, max_tokens=8192) elif model_name.startswith("claude"): chat = ChatAnthropic(model_name=model_name, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"], max_tokens_to_sample=4096) else: chat = ChatGroq(model_name=model_name, api_key=os.environ["GROQ_API_KEY"]) return chat def build_glossary(glossary_file, fieldnames=None) -> VectorStoreRetriever: loader = CSVLoader(file_path=glossary_file, csv_args={"delimiter": ",", "quotechar": '"'}) # "fieldnames": fieldnames}) docs = loader.load() vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings()) retriever = vectorstore.as_retriever() return retriever def glossary_rewrite(glossary: VectorStoreRetriever, text: str): try: terms = glossary.invoke(input=text) print("\n".join([d.page_content for d in terms])) glossary_str = "\n\n".join([d.page_content.replace('\n', '. ') for d in terms]) except Exception as e: print(e.__class__, e) traceback.print_exc() terms = [] if len(terms) > 0: messages = [ SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in English language."""), HumanMessage(content=f"""Rewrite the following text using the terms in the glossary. Preserve the original text as much as possible. Replace the terms in original text that match the definition with the corresponding terms in the glossary. Terms, Definitions {glossary_str} """), ] print(f"HumanMessage={messages[1].content}") def generate(*data): global visible print("visible", visible) nargs = 9 feature, image, garment_type, model, temperature, excluded_words, included_words, glossary_upload, debug = data[:nargs] struct_ref = data[nargs:] print(f"{feature=}") print(f"{image=}") print(f"{garment_type=}") print(f"{model=}") print(f"{temperature=}") print(f"{excluded_words=}") print(f"{included_words=}") print(f"{debug=}") # print(f"{glossary=}") print(f"{glossary_upload=}") # print(f"{struct_ref=}") glossary = None if glossary_upload is not None: glossary = build_glossary(glossary_upload) chat = get_model(model) types, struct_languages = parse_structure(struct_ref) copy_languages = detect_language([struct_ref[2 * i + 1] for i in range(visible + 1)], model=chat) languages = [get_language(struct_lang=struct_lang, copy_lang=copy_lang) for struct_lang, copy_lang in zip(struct_languages, copy_languages)] # print("Struct languages--------------------------------------------\n", struct_languages) # print("Copy languages--------------------------------------------\n", copy_languages) # print("Languages--------------------------------------------\n", languages) # print("Types--------------------------------------------", types) image_features, base64_images = detect_features(image, garment_type) detected_features = "" intended_use = "" alt_texts = [] if image_features is not None and len(image_features) > 0: alt_texts = image_features["alt_text"] detected_features = ", ".join(image_features["features"]) intended_use = "Intended use: " + ", ".join(image_features["intended_use"]) print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}") if glossary: print("Getting terms") terms = glossary.invoke(input=feature + detected_features) for term in terms: print(term) batch = [] for i in range(visible + 1): structure = struct_ref[2 * i] copy = struct_ref[2 * i + 1] if len((structure + copy).strip()) > 0: if len(copy.strip()) > 0 and len(structure.strip()) > 0: print('------------') print("Using both copy and structure") messages = [ SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""), HumanMessage(content=f"""Generate 5 versions of the product description for a product with the following information. Write in a way that target the customer. Make sure that the structure of each output follows the reference structure. Make sure to use the tone of voice, rythm, cadence and style of the reference copy for each output. Use markdown format for each output. Do not include any part of the reference structure in the output. Do not use any of the excluded words in the output. Include all included words in the output. Do not hallucinate any information. Use creative language in each output. Rate the quality of each version based on the following criteria: - how well it follows the reference tone of voice, rythm, cadence and style. - how well it follows the reference structure. - how faithful it describes the product features. - how well it avoid the excluded words. - how well it includes the included words. - how creative the language is. The score should be a number between 0 and 10 with 10 being the best quality. Return the result in the following JSON format: {{ "versions": [ {{ "id": 1, "content": The first product description, "explanation": A less than 20 word explanation of the score of the first product description, "score": The score of the first product description }}, {{ "id": 2, "content": The second product description, "explanation": A less than 20 word explanation of the score of the first product description, "score": The score of the second product description }}, ... ], "best_version": {{ "id": The id of the best version, "explanation": Explanation for why this version is the best }} }} Make sure that the output is in JSON format, no extra text should be included in the output. Product information: Key features: {feature + detected_features} Intended use: {intended_use} Reference structure: {structure} Reference copy: {copy} Included words: {included_words} Excluded words: {excluded_words}"""),] elif len(copy.strip()) > 0: print('------------') print("Using copy") messages = [ SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""), HumanMessage(content=f"""Generate 5 versions of the product description for a product with the following information. Write in a way that target the customer. Make sure that the structure of each output follows the structure of the reference copy. Make sure to use the tone of voice, rythm, cadence and style of the reference copy for each output. Use markdown format for each output. Do not include any part of the reference structure in the output. Do not use any of the excluded words in the output. Include all included words in the output. Do not hallucinate any information. Use creative language in each output. Rate the quality of each version based on the following criteria: - how well it follows the reference tone of voice, rythm, cadence and style. - how well it follows the reference structure. - how faithful it describes the product features. - how well it avoid the excluded words. - how well it includes the included words. - how creative the language is. The score should be a number between 0 and 10 with 10 being the best quality. Return the result in the following JSON format: {{ "versions": [ {{ "id": 1, "content": The first product description, "explanation": A less than 20 word explanation of the score of the first product description, "score": The score of the first product description }}, {{ "id": 2, "content": The second product description, "explanation": A less than 20 word explanation of the score of the first product description, "score": The score of the second product description }}, ... ], "best_version": {{ "id": The id of the best version, "explanation": Explanation for why this version is the best }} }} Make sure that the output is in JSON format, no extra text should be included in the output. Product information: Key features: {feature + detected_features} Intended use: {intended_use} Reference copy: {copy} Included words: {included_words} Excluded words: {excluded_words}"""),] print(messages[1].content) print('------------') elif len(structure.strip()) > 0: print('------------') print("Using structure") messages = [ SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""), HumanMessage(content=f"""Generate 5 versions of the product description for a product with the following information. Write in a way that target the customer. Make sure that the structure of each output follows the reference structure. Use markdown format for each output. Do not include any part of the reference structure in the output. Do not use any of the excluded words in the output. Include all included words in the output. Do not hallucinate any information. Use creative language in each output. Rate the quality of each version based on the following criteria: - how well it follows the reference tone of voice, rythm, cadence and style. - how well it follows the reference structure. - how faithful it describes the product features. - how well it avoid the excluded words. - how well it includes the included words. - how creative the language is. The score should be a number between 0 and 10 with 10 being the best quality. Return the result in the following JSON format: {{ "versions": [ {{ "id": 1, "content": The first product description, "explanation": A less than 20 word explanation of the score of the first product description, "score": The score of the first product description }}, {{ "id": 2, "content": The second product description, "explanation": A less than 20 word explanation of the score of the first product description, "score": The score of the second product description }}, ... ], "best_version": {{ "id": The id of the best version, "explanation": Explanation for why this version is the best }} }} Make sure that the output is in JSON format, no extra text should be included in the output. Product information: Key features: {feature + detected_features} Intended use: {intended_use} Reference structure: {structure} Reference copy: {copy} Included words: {included_words} Excluded words: {excluded_words}"""),] print(messages[1].content) print('------------') batch.append(messages) descriptions = "" response = chat.batch(batch, temperature=temperature) print(response) parser = JsonOutputParser() jresponse = [parser.parse(msg.content) for msg in response] descriptions = [] for jr in jresponse: print(f'{jr=}') bestid = jr["best_version"]["id"] for d in jr["versions"]: if d["id"] == bestid: bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {jr['best_version']['explanation']}" if debug else "") bests = d["score"] break # bests = 0 # bestd = "" # for d in jr: # print(f'{d["score"]=}, {d["id"]=}, {bests=}') # if d["score"] > bests: # bests = d["score"] # bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "") # elif d["score"] == bests and random.random() > 0.5: # bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "") # if d["id"] == bestid: # bests = d["score"] # bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "") # break descriptions.append(bestd) # glossary_rewrite(glossary, descriptions[0]) md_content = "\n\n---\n\n".join(descriptions) alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else "" alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {} result_json = {"outputs": jresponse if debug else descriptions, "alt_text": alt_text_dict} result_md = md_content + alt_texts_str + '\n'.join([f'![Product photo](data:image/png;base64,{base64_image} "{alt_text}")' if base64_image != "" else "" for (base64_image, alt_text) in zip(base64_images, alt_texts)]) return result_md, result_json visible = 0 def add_output_click(*struct_ref): global visible print("Adding output ", visible) # print(struct_ref) visible += 1 structure_texts = struct_ref[::2] reference_texts = struct_ref[1::2] structures = [gr.Textbox(label=f"Structure {i}", lines=10, value=structure_texts[i], interactive=True, visible=i <= visible) for i in range(10)] references = [gr.Textbox(label=f"Reference copy {i}", lines=3, value=reference_texts[i], interactive=True, visible=i <= visible) for i in range(10)] struct_ref = [val for pair in zip(structures, references) for val in pair] return struct_ref def remove_output_click(*struct_ref): global visible print("Removing output", visible) if visible == 0: return struct_ref visible -= 1 structure_texts = struct_ref[::2] reference_texts = struct_ref[1::2] structures = [gr.Textbox(label=f"Structure {i}", lines=10, value=structure_texts[i] if i <= visible else "", interactive=True, visible=i <= visible) for i in range(10)] references = [gr.Textbox(label=f"Reference copy {i}", lines=3, value=reference_texts[i] if i <= visible else "", interactive=True, visible=i <= visible) for i in range(10)] struct_ref = [val for pair in zip(structures, references) for val in pair] return struct_ref def show_advanced(model, temperature): model = gr.Dropdown(models, value="gpt-4-turbo", interactive=True, label="Model", visible=True) temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True) return model, temperature with gr.Blocks() as demo: visible = 0 print("Building interface") with gr.Row(): with gr.Column(): feature = gr.Textbox(label="Features", value=feature_text, lines=3, interactive=True) image = gr.Gallery(label="Images") garment_type = gr.Textbox(label="Garment Type", value="all", lines=1, interactive=True) # language = gr.Dropdown(languages, value="American English", interactive=True, label="Language") with gr.Accordion(label="Advanced Options", open=False): model = gr.Dropdown(models, value="claude-3-5-sonnet-20240620", interactive=True, label="Model", visible=True) temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True) excluded_words = gr.Textbox(label="Excluded words", interactive=True, lines=2) included_words = gr.Textbox(label="Included words", interactive=True, lines=2) # glossary = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2,"static"), headers=["Description", "Way of writing"], label="Glossary", interactive=True) glossary_upload = gr.UploadButton(label="Upload Glossary", interactive=True, file_types=["csv"]) debug = gr.Checkbox(label="Debug", interactive=True, value=True) with gr.Row(): submit = gr.Button(value="Submit") # advanced = gr.Button(value="Advanced") with gr.Column(): visible = 0 struct_ref = [val for i in range(10) for val in [gr.Textbox(label=f"Structure {i}", lines=10, value="", interactive=True, visible=i <= visible), gr.Textbox(label=f"Reference copy {i}", lines=3, value="", interactive=True, visible=i <= visible)]] struct_ref[0].value = structure_text_1 # struct_ref[2].value = structure_text_2 with gr.Row(): add_output = gr.Button(value="Add Output") remove_output = gr.Button(value="Remove Output") add_output.click(add_output_click, inputs=struct_ref, outputs=struct_ref) remove_output.click(remove_output_click, inputs=struct_ref, outputs=struct_ref) with gr.Column(): md_output = gr.Markdown(label="Output", show_label=True) json_output = gr.JSON(label="JSON Output") submit.click(generate, inputs=[feature, image, garment_type, model, temperature, excluded_words, included_words, glossary_upload, debug, *struct_ref], outputs=[md_output, json_output]) # advanced.click(show_advanced, inputs=[], outputs=[model, temperature]) import bcrypt def authf(username, password): try: with open("passwords.txt", "r") as f: for line in f.readlines(): u, p = line.strip().split() # print(u, p, password) if u == username and bcrypt.checkpw(password.encode('utf-8'), p.encode('utf-8')): return True except Exception as e: print("Error reading password", e) traceback.print_exc() return False if __name__ == '__main__': # demo.launch(server_name="0.0.0.0", auth=authf) demo.launch()