Spaces:
Paused
Paused
| import random | |
| import traceback | |
| import gradio as gr | |
| import numpy as np | |
| import os | |
| from langchain_core.output_parsers import JsonOutputParser | |
| from langchain_openai.chat_models import ChatOpenAI | |
| from langchain.schema import HumanMessage, SystemMessage, AIMessage | |
| from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages | |
| from langchain_groq import ChatGroq | |
| import openai | |
| from langchain import hub | |
| from langchain_chroma import Chroma | |
| from langchain_community.document_loaders import WebBaseLoader, CSVLoader | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_core.vectorstores import VectorStoreRetriever | |
| feature_text = "Brand: Duckly. \nProduct name: Duck runner pro. \nKey properties: t-shirt, for running, sweat wicking, for marathon, 100% cotton." | |
| garment_type = "all" | |
| reference_text = "" | |
| structure_text = \ | |
| """# Headline {{ headline | inspiring, bold, action-oriented, max 8 words }} | |
| ## Introduction | |
| {{ introduction_paragraph | motivational, passionate, 2-3 sentences }} | |
| ## Features and Benefits | |
| {% for feature in features %} | |
| ### Feature {{ loop.index }}: {{ feature.name | dynamic, direct, 5-6 words }} | |
| {{ feature.details | energetic, clear, 3-4 sentences }} | |
| {% endfor %} | |
| ## Technical Specifications | |
| {{ technical_specs | informative, to the point, concise list format }} | |
| """ | |
| structure_text_1 = """[type: UK website, style=true, language=English] | |
| {{ introduction_paragraph | motivational, passionate, 1-2 sentences }} | |
| {% for feature in features as bulleted list %} | |
| {{ feature.description | dynamic, direct, 3-6 words }} | |
| {% endfor %} | |
| {{ technical_specs | informative, to the point, concise list format }}""" | |
| structure_text_2 = """[type: Japanese newsletter, style=true, language=Japanese] | |
| {{ introduction_paragraph | motivational, passionate, 3-6 sentences }}""" | |
| languages = ["American English", | |
| "British English", | |
| "German", | |
| "French", | |
| "Chinese", | |
| "Spanish", | |
| "Dutch", | |
| "Italian", | |
| "Japanese", | |
| "Polish", | |
| "Portuguese"] | |
| models = ["gpt-4-turbo", | |
| "gpt-4o", | |
| "gpt-3.5-turbo", | |
| "claude-3-sonnet-20240229", | |
| "claude-3-opus-20240229", | |
| "claude-3-5-sonnet-20240620" | |
| #"llama3-70b-8192", | |
| ] | |
| openai.api_key = os.environ["OPENAI_API_KEY"] | |
| import base64 | |
| import requests | |
| # OpenAI API Key | |
| # Function to encode the image | |
| def encode_image(image_path): | |
| with open(image_path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode('utf-8') | |
| import json | |
| def get_json(text: str): | |
| text = text.strip().replace('`', '').replace('json', '') | |
| if text.startswith("No garment detected"): | |
| return { | |
| "features": [], | |
| "intended_use": [], | |
| "alt_text": [] | |
| } | |
| return json.loads(text) | |
| def detect_features(image_paths, garment_type, language="English"): | |
| # Path to your image | |
| # image_path = "path_to_your_image.jpg" | |
| # Getting the base64 string | |
| try: | |
| base64_images = [encode_image(image_path[0]) for image_path in image_paths] | |
| if garment_type == "" or garment_type == "all": | |
| garment_type = "garment" | |
| headers = { | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {openai.api_key}" | |
| } | |
| payload = { | |
| "model": "gpt-4o", | |
| # "model": "gpt-4-turbo", | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": f"""Describe the features of the {garment_type} in the photos in less than 100 words. | |
| What is the intended use of the {garment_type} in this image, use at most 5 words for intended use? | |
| Generate alt text for each of the images. | |
| Make sure to output the alt text in {language} language. | |
| If the photo does not contain a garment, return 'No garment detected'. | |
| If the photo contains a garment, return the result in in the following JSON format without any preceding or trailing text: | |
| {{ | |
| "features": [list of comma separated features], | |
| "intended_use": [list of comma separated intended uses], | |
| "alt_text": [list of alt text for image 1, alt text for image 2] | |
| }}""" | |
| }, | |
| ] + [{ | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}" | |
| } | |
| } for base64_image in base64_images] | |
| } | |
| ], | |
| "temperature": 0.0, | |
| "max_tokens": 300 | |
| } | |
| response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
| print(response) | |
| response = response.json() | |
| print("image features", response["choices"][0]['message']['content']) | |
| jresponse = get_json(response["choices"][0]['message']['content']) | |
| return jresponse, base64_images | |
| except Exception as e: | |
| print(e.__class__, e) | |
| traceback.print_exc() | |
| return "", [] | |
| import re | |
| def parse_structure(struct_ref): | |
| languages = ["_"] * (len(struct_ref) // 2) | |
| types = ["_"] * (len(struct_ref) // 2) | |
| for si in range(0, len(struct_ref), 2): | |
| parts = re.findall('[a-zA-Z\n ]+', struct_ref[si]) | |
| for idx, part in enumerate(parts): | |
| if "language" in part.lower(): | |
| lang = parts[idx + 1].strip() | |
| languages[si // 2] = lang | |
| if "type" in part.lower(): | |
| type = parts[idx + 1].strip() | |
| types[si // 2] = type | |
| return types, languages | |
| def detect_language(texts, model): | |
| langs = ["_"] * len(texts) | |
| try: | |
| messages = [] | |
| lang_map = {} | |
| for i, text in enumerate(texts): | |
| if len(text.strip()) > 0: | |
| lang_mess = [HumanMessage(content=f"What is the language of the following text? Output the language only. " | |
| f"\n ```{text}```")] | |
| print(f"{lang_mess=}") | |
| messages.append(lang_mess) | |
| lang_map[i] = len(messages) - 1 | |
| detected_langs = model.batch(messages) | |
| print(f"{detected_langs=}") | |
| for k, v in lang_map.items(): | |
| langs[k] = detected_langs[v].content | |
| except Exception as e: | |
| print(e.__class__, e) | |
| traceback.print_exc() | |
| return langs | |
| def get_language(struct_lang, copy_lang): | |
| if struct_lang != "_": | |
| return struct_lang | |
| if copy_lang != "_": | |
| return copy_lang | |
| return "English" | |
| def get_model(model_name): | |
| if model_name.startswith("gpt"): | |
| chat = ChatOpenAI(model=model_name, max_tokens=8192) | |
| elif model_name.startswith("claude"): | |
| chat = ChatAnthropic(model_name=model_name, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"], max_tokens_to_sample=4096) | |
| else: | |
| chat = ChatGroq(model_name=model_name, api_key=os.environ["GROQ_API_KEY"]) | |
| return chat | |
| def build_glossary(glossary_file, fieldnames=None) -> VectorStoreRetriever: | |
| loader = CSVLoader(file_path=glossary_file, | |
| csv_args={"delimiter": ",", | |
| "quotechar": '"'}) | |
| # "fieldnames": fieldnames}) | |
| docs = loader.load() | |
| vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings()) | |
| retriever = vectorstore.as_retriever() | |
| return retriever | |
| def glossary_rewrite(glossary: VectorStoreRetriever, text: str): | |
| try: | |
| terms = glossary.invoke(input=text) | |
| print("\n".join([d.page_content for d in terms])) | |
| glossary_str = "\n\n".join([d.page_content.replace('\n', '. ') for d in terms]) | |
| except Exception as e: | |
| print(e.__class__, e) | |
| traceback.print_exc() | |
| terms = [] | |
| if len(terms) > 0: | |
| messages = [ | |
| SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in English language."""), | |
| HumanMessage(content=f"""Rewrite the following text using the terms in the glossary. | |
| Preserve the original text as much as possible. | |
| Replace the terms in original text that match the definition with the corresponding terms in the glossary. | |
| Terms, Definitions | |
| {glossary_str} | |
| """), | |
| ] | |
| print(f"HumanMessage={messages[1].content}") | |
| def generate(*data): | |
| global visible | |
| print("visible", visible) | |
| nargs = 9 | |
| feature, image, garment_type, model, temperature, excluded_words, included_words, glossary_upload, debug = data[:nargs] | |
| struct_ref = data[nargs:] | |
| print(f"{feature=}") | |
| print(f"{image=}") | |
| print(f"{garment_type=}") | |
| print(f"{model=}") | |
| print(f"{temperature=}") | |
| print(f"{excluded_words=}") | |
| print(f"{included_words=}") | |
| print(f"{debug=}") | |
| # print(f"{glossary=}") | |
| print(f"{glossary_upload=}") | |
| # print(f"{struct_ref=}") | |
| glossary = None | |
| if glossary_upload is not None: | |
| glossary = build_glossary(glossary_upload) | |
| chat = get_model(model) | |
| types, struct_languages = parse_structure(struct_ref) | |
| copy_languages = detect_language([struct_ref[2 * i + 1] for i in range(visible + 1)], model=chat) | |
| languages = [get_language(struct_lang=struct_lang, copy_lang=copy_lang) for struct_lang, copy_lang in zip(struct_languages, copy_languages)] | |
| # print("Struct languages--------------------------------------------\n", struct_languages) | |
| # print("Copy languages--------------------------------------------\n", copy_languages) | |
| # print("Languages--------------------------------------------\n", languages) | |
| # print("Types--------------------------------------------", types) | |
| image_features, base64_images = detect_features(image, garment_type) | |
| detected_features = "" | |
| intended_use = "" | |
| alt_texts = [] | |
| if image_features is not None and len(image_features) > 0: | |
| alt_texts = image_features["alt_text"] | |
| detected_features = ", ".join(image_features["features"]) | |
| intended_use = "Intended use: " + ", ".join(image_features["intended_use"]) | |
| print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}") | |
| if glossary: | |
| print("Getting terms") | |
| terms = glossary.invoke(input=feature + detected_features) | |
| for term in terms: | |
| print(term) | |
| batch = [] | |
| for i in range(visible + 1): | |
| structure = struct_ref[2 * i] | |
| copy = struct_ref[2 * i + 1] | |
| if len((structure + copy).strip()) > 0: | |
| if len(copy.strip()) > 0 and len(structure.strip()) > 0: | |
| print('------------') | |
| print("Using both copy and structure") | |
| messages = [ | |
| SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""), | |
| HumanMessage(content=f"""Generate 5 versions of the product description for a product with the following information. | |
| Write in a way that target the customer. | |
| Make sure that the structure of each output follows the reference structure. | |
| Make sure to use the tone of voice, rythm, cadence and style of the reference copy for each output. | |
| Use markdown format for each output. | |
| Do not include any part of the reference structure in the output. | |
| Do not use any of the excluded words in the output. | |
| Include all included words in the output. | |
| Do not hallucinate any information. | |
| Use creative language in each output. | |
| Rate the quality of each version based on the following criteria: | |
| - how well it follows the reference tone of voice, rythm, cadence and style. | |
| - how well it follows the reference structure. | |
| - how faithful it describes the product features. | |
| - how well it avoid the excluded words. | |
| - how well it includes the included words. | |
| - how creative the language is. | |
| The score should be a number between 0 and 10 with 10 being the best quality. | |
| Return the result in the following JSON format: | |
| {{ | |
| "versions": [ | |
| {{ | |
| "id": 1, | |
| "content": The first product description, | |
| "explanation": A less than 20 word explanation of the score of the first product description, | |
| "score": The score of the first product description | |
| }}, | |
| {{ | |
| "id": 2, | |
| "content": The second product description, | |
| "explanation": A less than 20 word explanation of the score of the first product description, | |
| "score": The score of the second product description | |
| }}, | |
| ... | |
| ], | |
| "best_version": {{ | |
| "id": The id of the best version, | |
| "explanation": Explanation for why this version is the best | |
| }} | |
| }} | |
| Make sure that the output is in JSON format, no extra text should be included in the output. | |
| Product information: | |
| Key features: {feature + detected_features} | |
| Intended use: {intended_use} | |
| Reference structure: {structure} | |
| Reference copy: {copy} | |
| Included words: {included_words} | |
| Excluded words: {excluded_words}"""),] | |
| elif len(copy.strip()) > 0: | |
| print('------------') | |
| print("Using copy") | |
| messages = [ | |
| SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""), | |
| HumanMessage(content=f"""Generate 5 versions of the product description for a product with the following information. | |
| Write in a way that target the customer. | |
| Make sure that the structure of each output follows the structure of the reference copy. | |
| Make sure to use the tone of voice, rythm, cadence and style of the reference copy for each output. | |
| Use markdown format for each output. | |
| Do not include any part of the reference structure in the output. | |
| Do not use any of the excluded words in the output. | |
| Include all included words in the output. | |
| Do not hallucinate any information. | |
| Use creative language in each output. | |
| Rate the quality of each version based on the following criteria: | |
| - how well it follows the reference tone of voice, rythm, cadence and style. | |
| - how well it follows the reference structure. | |
| - how faithful it describes the product features. | |
| - how well it avoid the excluded words. | |
| - how well it includes the included words. | |
| - how creative the language is. | |
| The score should be a number between 0 and 10 with 10 being the best quality. | |
| Return the result in the following JSON format: | |
| {{ | |
| "versions": [ | |
| {{ | |
| "id": 1, | |
| "content": The first product description, | |
| "explanation": A less than 20 word explanation of the score of the first product description, | |
| "score": The score of the first product description | |
| }}, | |
| {{ | |
| "id": 2, | |
| "content": The second product description, | |
| "explanation": A less than 20 word explanation of the score of the first product description, | |
| "score": The score of the second product description | |
| }}, | |
| ... | |
| ], | |
| "best_version": {{ | |
| "id": The id of the best version, | |
| "explanation": Explanation for why this version is the best | |
| }} | |
| }} | |
| Make sure that the output is in JSON format, no extra text should be included in the output. | |
| Product information: | |
| Key features: {feature + detected_features} | |
| Intended use: {intended_use} | |
| Reference copy: {copy} | |
| Included words: {included_words} | |
| Excluded words: {excluded_words}"""),] | |
| print(messages[1].content) | |
| print('------------') | |
| elif len(structure.strip()) > 0: | |
| print('------------') | |
| print("Using structure") | |
| messages = [ | |
| SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""), | |
| HumanMessage(content=f"""Generate 5 versions of the product description for a product with the following information. | |
| Write in a way that target the customer. | |
| Make sure that the structure of each output follows the reference structure. | |
| Use markdown format for each output. | |
| Do not include any part of the reference structure in the output. | |
| Do not use any of the excluded words in the output. | |
| Include all included words in the output. | |
| Do not hallucinate any information. | |
| Use creative language in each output. | |
| Rate the quality of each version based on the following criteria: | |
| - how well it follows the reference tone of voice, rythm, cadence and style. | |
| - how well it follows the reference structure. | |
| - how faithful it describes the product features. | |
| - how well it avoid the excluded words. | |
| - how well it includes the included words. | |
| - how creative the language is. | |
| The score should be a number between 0 and 10 with 10 being the best quality. | |
| Return the result in the following JSON format: | |
| {{ | |
| "versions": [ | |
| {{ | |
| "id": 1, | |
| "content": The first product description, | |
| "explanation": A less than 20 word explanation of the score of the first product description, | |
| "score": The score of the first product description | |
| }}, | |
| {{ | |
| "id": 2, | |
| "content": The second product description, | |
| "explanation": A less than 20 word explanation of the score of the first product description, | |
| "score": The score of the second product description | |
| }}, | |
| ... | |
| ], | |
| "best_version": {{ | |
| "id": The id of the best version, | |
| "explanation": Explanation for why this version is the best | |
| }} | |
| }} | |
| Make sure that the output is in JSON format, no extra text should be included in the output. | |
| Product information: | |
| Key features: {feature + detected_features} | |
| Intended use: {intended_use} | |
| Reference structure: {structure} | |
| Reference copy: {copy} | |
| Included words: {included_words} | |
| Excluded words: {excluded_words}"""),] | |
| print(messages[1].content) | |
| print('------------') | |
| batch.append(messages) | |
| descriptions = "" | |
| response = chat.batch(batch, temperature=temperature) | |
| print(response) | |
| parser = JsonOutputParser() | |
| jresponse = [parser.parse(msg.content) for msg in response] | |
| descriptions = [] | |
| for jr in jresponse: | |
| print(f'{jr=}') | |
| bestid = jr["best_version"]["id"] | |
| for d in jr["versions"]: | |
| if d["id"] == bestid: | |
| bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {jr['best_version']['explanation']}" if debug else "") | |
| bests = d["score"] | |
| break | |
| # bests = 0 | |
| # bestd = "" | |
| # for d in jr: | |
| # print(f'{d["score"]=}, {d["id"]=}, {bests=}') | |
| # if d["score"] > bests: | |
| # bests = d["score"] | |
| # bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "") | |
| # elif d["score"] == bests and random.random() > 0.5: | |
| # bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "") | |
| # if d["id"] == bestid: | |
| # bests = d["score"] | |
| # bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "") | |
| # break | |
| descriptions.append(bestd) | |
| # glossary_rewrite(glossary, descriptions[0]) | |
| md_content = "\n\n---\n\n".join(descriptions) | |
| alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else "" | |
| alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {} | |
| result_json = {"outputs": jresponse if debug else descriptions, "alt_text": alt_text_dict} | |
| result_md = md_content + alt_texts_str + '\n'.join([f'' if base64_image != "" else "" for (base64_image, alt_text) in zip(base64_images, alt_texts)]) | |
| return result_md, result_json | |
| visible = 0 | |
| def add_output_click(*struct_ref): | |
| global visible | |
| print("Adding output ", visible) | |
| # print(struct_ref) | |
| visible += 1 | |
| structure_texts = struct_ref[::2] | |
| reference_texts = struct_ref[1::2] | |
| structures = [gr.Textbox(label=f"Structure {i}", lines=10, value=structure_texts[i], interactive=True, visible=i <= visible) for i in range(10)] | |
| references = [gr.Textbox(label=f"Reference copy {i}", lines=3, value=reference_texts[i], interactive=True, visible=i <= visible) for i in range(10)] | |
| struct_ref = [val for pair in zip(structures, references) for val in pair] | |
| return struct_ref | |
| def remove_output_click(*struct_ref): | |
| global visible | |
| print("Removing output", visible) | |
| if visible == 0: | |
| return struct_ref | |
| visible -= 1 | |
| structure_texts = struct_ref[::2] | |
| reference_texts = struct_ref[1::2] | |
| structures = [gr.Textbox(label=f"Structure {i}", lines=10, value=structure_texts[i] if i <= visible else "", interactive=True, visible=i <= visible) for i in range(10)] | |
| references = [gr.Textbox(label=f"Reference copy {i}", lines=3, value=reference_texts[i] if i <= visible else "", interactive=True, visible=i <= visible) for i in range(10)] | |
| struct_ref = [val for pair in zip(structures, references) for val in pair] | |
| return struct_ref | |
| def show_advanced(model, temperature): | |
| model = gr.Dropdown(models, value="gpt-4-turbo", interactive=True, label="Model", visible=True) | |
| temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True) | |
| return model, temperature | |
| with gr.Blocks() as demo: | |
| visible = 0 | |
| print("Building interface") | |
| with gr.Row(): | |
| with gr.Column(): | |
| feature = gr.Textbox(label="Features", value=feature_text, lines=3, interactive=True) | |
| image = gr.Gallery(label="Images") | |
| garment_type = gr.Textbox(label="Garment Type", value="all", lines=1, interactive=True) | |
| # language = gr.Dropdown(languages, value="American English", interactive=True, label="Language") | |
| with gr.Accordion(label="Advanced Options", open=False): | |
| model = gr.Dropdown(models, value="claude-3-5-sonnet-20240620", interactive=True, label="Model", visible=True) | |
| temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True) | |
| excluded_words = gr.Textbox(label="Excluded words", interactive=True, lines=2) | |
| included_words = gr.Textbox(label="Included words", interactive=True, lines=2) | |
| # glossary = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2,"static"), headers=["Description", "Way of writing"], label="Glossary", interactive=True) | |
| glossary_upload = gr.UploadButton(label="Upload Glossary", interactive=True, file_types=["csv"]) | |
| debug = gr.Checkbox(label="Debug", interactive=True, value=True) | |
| with gr.Row(): | |
| submit = gr.Button(value="Submit") | |
| # advanced = gr.Button(value="Advanced") | |
| with gr.Column(): | |
| visible = 0 | |
| struct_ref = [val for i in range(10) for val in | |
| [gr.Textbox(label=f"Structure {i}", lines=10, value="", interactive=True, visible=i <= visible), | |
| gr.Textbox(label=f"Reference copy {i}", lines=3, value="", interactive=True, visible=i <= visible)]] | |
| struct_ref[0].value = structure_text_1 | |
| # struct_ref[2].value = structure_text_2 | |
| with gr.Row(): | |
| add_output = gr.Button(value="Add Output") | |
| remove_output = gr.Button(value="Remove Output") | |
| add_output.click(add_output_click, inputs=struct_ref, outputs=struct_ref) | |
| remove_output.click(remove_output_click, inputs=struct_ref, outputs=struct_ref) | |
| with gr.Column(): | |
| md_output = gr.Markdown(label="Output", show_label=True) | |
| json_output = gr.JSON(label="JSON Output") | |
| submit.click(generate, inputs=[feature, image, garment_type, model, temperature, | |
| excluded_words, included_words, glossary_upload, debug, *struct_ref], | |
| outputs=[md_output, json_output]) | |
| # advanced.click(show_advanced, inputs=[], outputs=[model, temperature]) | |
| import bcrypt | |
| def authf(username, password): | |
| try: | |
| with open("passwords.txt", "r") as f: | |
| for line in f.readlines(): | |
| u, p = line.strip().split() | |
| # print(u, p, password) | |
| if u == username and bcrypt.checkpw(password.encode('utf-8'), p.encode('utf-8')): | |
| return True | |
| except Exception as e: | |
| print("Error reading password", e) | |
| traceback.print_exc() | |
| return False | |
| if __name__ == '__main__': | |
| # demo.launch(server_name="0.0.0.0", auth=authf) | |
| demo.launch() | |