Copywriting / app.py
tunght's picture
fix size and weight hallucination. Problem: cannot detect image feature with the old prompt
434b3cb
raw
history blame
30.5 kB
import random
import traceback
import gradio as gr
import numpy as np
import os
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
import openai
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader, CSVLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import VectorStoreRetriever
import time
# feature_text = "Brand: Duckly. \nProduct name: Duck runner pro. \nKey properties: t-shirt, for running, sweat wicking, for marathon, 100% cotton."
feature_text = """Brand: Puma.
Product name: Puma duffel bag.
Key features: light weight, only 1kg"""
garment_type = "all"
reference_text = """NULLUS STUDIOS
Black Camellia Lapel Brooch Coat
Brushed wool-blend melton coat.
路 Detachable brooch at notched lapel
路 Button closure
路 Welt pocket and flap pockets
路 Padded shoulders
路 Size 30 x 20 x 60 cm
路 Weight only 9kg
路 Four-button surgeon's cuffs
路 TextPatch at cuff
路 Logo cutout at back collar
路 Welt pockets at interior
路 Full cupro satin lining
路 Logo-engraved antiqued silver-tone hardware"""
structure_text = ""
structure_text_0 = \
"""# Headline {{ headline | inspiring, bold, action-oriented, max 8 words }}
## Introduction
{{ introduction_paragraph | motivational, passionate, 2-3 sentences }}
## Features and Benefits
{% for feature in features %}
### Feature {{ loop.index }}: {{ feature.name | dynamic, direct, 5-6 words }}
{{ feature.details | energetic, clear, 3-4 sentences }}
{% endfor %}
## Technical Specifications
{{ technical_specs | informative, to the point, concise list format }}
"""
structure_text_1 = """[type: UK website, style=true, language=English]
{{ introduction_paragraph | motivational, passionate, 1-2 sentences }}
{% for feature in features as bulleted list %}
{{ feature.description | dynamic, direct, 3-6 words }}
{% endfor %}
{{ technical_specs | informative, to the point, concise list format }}"""
structure_text_2 = """[type: Japanese newsletter, style=true, language=Japanese]
{{ introduction_paragraph | motivational, passionate, 3-6 sentences }}"""
languages = ["American English",
"British English",
"German",
"French",
"Chinese",
"Spanish",
"Dutch",
"Italian",
"Japanese",
"Polish",
"Portuguese"]
models = ["gpt-4-turbo",
"gpt-4o",
"gpt-3.5-turbo",
"claude-3-sonnet-20240229",
"claude-3-opus-20240229",
"claude-3-5-sonnet-20240620",
"claude-3-5-sonnet-20241022",
"gemini-1.5-pro"
#"llama3-70b-8192",
]
openai.api_key = os.environ["OPENAI_API_KEY"]
import base64
import requests
# OpenAI API Key
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
import json
def get_json(text: str):
text = text.strip().replace('`', '').replace('json', '')
if text.startswith("No garment detected"):
return {
"features": [],
"intended_use": [],
"alt_text": []
}
return json.loads(text)
def detect_features(image_paths, garment_type, language="English"):
# Path to your image
# image_path = "path_to_your_image.jpg"
# Getting the base64 string
try:
base64_images = [encode_image(image_path[0]) for image_path in image_paths]
if garment_type == "" or garment_type == "all":
garment_type = "garment"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai.api_key}"
}
payload = {
"model": "gpt-4o",
# "model": "gpt-4-turbo",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": f"""Describe the features of the {garment_type} in the photos.
Return the result in in the following JSON format without any preceding or trailing text:
{{
"features": [list of comma separated features],
"intended_use": [list of comma separated intended uses],
"alt_text": [list of alt text for image 1, alt text for image 2]
}}"""
},
] + [{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
} for base64_image in base64_images]
}
],
"temperature": 0.0,
"max_tokens": 300
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
print(response)
response = response.json()
print("image features", response["choices"][0]['message']['content'])
jresponse = get_json(response["choices"][0]['message']['content'])
return jresponse, base64_images
except Exception as e:
print(e.__class__, e)
traceback.print_exc()
return "", []
import re
def parse_structure(struct_ref):
languages = ["_"] * (len(struct_ref) // 2)
types = ["_"] * (len(struct_ref) // 2)
for si in range(0, len(struct_ref), 2):
parts = re.findall('[a-zA-Z\n ]+', struct_ref[si])
for idx, part in enumerate(parts):
if "language" in part.lower():
lang = parts[idx + 1].strip()
languages[si // 2] = lang
if "type" in part.lower():
type = parts[idx + 1].strip()
types[si // 2] = type
return types, languages
def detect_language(texts, model):
langs = ["_"] * len(texts)
try:
messages = []
lang_map = {}
for i, text in enumerate(texts):
if len(text.strip()) > 0:
lang_mess = [HumanMessage(content=f"What is the language of the following text? Output the language only. "
f"\n ```{text}```")]
print(f"{lang_mess=}")
messages.append(lang_mess)
lang_map[i] = len(messages) - 1
detected_langs = model.batch(messages)
print(f"{detected_langs=}")
for k, v in lang_map.items():
langs[k] = detected_langs[v].content
except Exception as e:
print(e.__class__, e)
traceback.print_exc()
return langs
def get_language(struct_lang, copy_lang):
if struct_lang != "_":
return struct_lang
if copy_lang != "_":
return copy_lang
return "English"
def post_process(text):
# text = text.strip().replace('<br>', ' ')
# with open(f"postprocess_{time.time()}.txt", "w") as f:
# f.write(text)
# return text
return text
def get_model(model_name, temperature=0.0):
if model_name.startswith("gpt"):
chat = ChatOpenAI(model=model_name, max_tokens=4096, temperature=temperature)
elif model_name.startswith("claude"):
chat = ChatAnthropic(model_name=model_name, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"], max_tokens_to_sample=4096, temperature=temperature)
elif model_name.startswith("gemini"):
chat = ChatGoogleGenerativeAI(model=model_name, api_key=os.environ["GOOGLE_API_KEY"], temperature=temperature)
else:
chat = ChatGroq(model_name=model_name, api_key=os.environ["GROQ_API_KEY"], temperature=temperature)
return chat
def build_glossary(glossary_file, fieldnames=None) -> VectorStoreRetriever:
loader = CSVLoader(file_path=glossary_file,
csv_args={"delimiter": ",",
"quotechar": '"'})
# "fieldnames": fieldnames})
docs = loader.load()
vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()
return retriever
def glossary_rewrite(chat: ChatOpenAI, glossary: VectorStoreRetriever, text: str):
try:
terms = glossary.invoke(input=text)
print("\n".join([d.page_content for d in terms]))
glossary_str = "\n\n".join([d.page_content.replace('\n', '. ') for d in terms])
if len(terms) > 0:
messages = [
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in English language."""),
HumanMessage(content=f"""Rewrite the following text using the terms in the glossary.
Preserve the original text as much as possible.
Replace the terms in original text that match the definition with the corresponding terms in the glossary.
Output only the rewritten text in markdown format.
Terms, Definitions
{glossary_str}
Text to rewrite:
{text}
"""),]
print(f"HumanMessage={messages[1].content}")
response = chat.invoke(messages, temperature=0.0)
print(f"Response=\n{response.content}")
return response.content
except Exception as e:
print(e.__class__, e)
traceback.print_exc()
terms = []
return ""
def generate(*data):
global visible
print("visible", visible)
nargs = 10
feature, image, garment_type, model, temperature, nversions, excluded_words, included_words, glossary_upload, debug = data[:nargs]
struct_ref = data[nargs:]
print(f"{feature=}")
print(f"{image=}")
print(f"{garment_type=}")
print(f"{model=}")
print(f"{temperature=}")
print(f"{excluded_words=}")
print(f"{included_words=}")
print(f"{debug=}")
# print(f"{glossary=}")
print(f"{glossary_upload=}")
# print(f"{struct_ref=}")
glossary = None
if glossary_upload is not None:
glossary = build_glossary(glossary_upload)
chat = get_model(model, temperature=temperature)
types, struct_languages = parse_structure(struct_ref)
copy_languages = detect_language([struct_ref[2 * i + 1] for i in range(visible + 1)], model=chat)
languages = [get_language(struct_lang=struct_lang, copy_lang=copy_lang) for struct_lang, copy_lang in zip(struct_languages, copy_languages)]
# print("Struct languages--------------------------------------------\n", struct_languages)
# print("Copy languages--------------------------------------------\n", copy_languages)
# print("Languages--------------------------------------------\n", languages)
# print("Types--------------------------------------------", types)
image_features, base64_images = detect_features(image, garment_type)
detected_features = ""
intended_use = ""
alt_texts = []
if image_features is not None and len(image_features) > 0:
alt_texts = image_features["alt_text"]
detected_features = ", ".join(image_features["features"])
intended_use = ", ".join(image_features["intended_use"])
print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}")
# if glossary:
# print("Getting terms")
# terms = glossary.invoke(input=feature + detected_features)
# for term in terms:
# print(term)
batch = []
min_length = 0
max_length = 150
for i in range(visible + 1):
structure = struct_ref[2 * i]
copy = struct_ref[2 * i + 1]
if len((structure + copy).strip()) > 0:
if len(copy.strip()) > 0 and len(structure.strip()) > 0:
print('------------')
print("Using both copy and structure")
messages = [
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
HumanMessage(content=f"""Generate {nversions} versions of the product description for a product with the following information.
Write in a way that target the customer.
For each version, write at least {min_length} words and at most {max_length} words.
Make sure that the structure of each output follows the reference structure. Do not add any additional sentences or structures that are not in the reference structure.
Make sure to use the tone of voice, rythm, cadence and style of the reference copy for each output.
Use markdown format for each output.
Do not include any part of the reference structure in the output.
The structure of the output should follow the reference structure.
Do not use the structure of the reference copy in the output.
Do not use any of the excluded words in the output.
Include all included words in the output.
Do not hallucinate any information.
Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
Use creative language in each output, do not use the common ways of starting product descriptions.
Avoid common phrases and cliches such as "Step into something", "Elevate something", "Discover something", "Unleash something", "Embrace something", and similar phrases.
For each version, try to write in different style.
Put each item in the list on a separate line. Use '-' character to start each item.
Rate the quality of each version based on the following criteria:
- how well it follows the reference structure.
- how well it follows the tone of voice, rythm, cadence and style of the reference copy.
- how faithful it describes the product features.
- how well it avoid the excluded words.
- how well it includes the included words.
- how creative the language is.
The score should be a number between 0 and 10 with 10 being the best quality.
Return the result in the following JSON format:
{{
"versions": [
{{
"id": 1,
"content": The first product description,
"explanation": A less than 20 word explanation of the score of the first product description,
"score": The score of the first product description
}},
{{
"id": 2,
"content": The second product description,
"explanation": A less than 20 word explanation of the score of the first product description,
"score": The score of the second product description
}},
...
],
"best_version": {{
"explanation": Explanation for why this version is the best,
"id": The id of the best version
}}
}}
Make sure that the output is in JSON format, no extra text should be included in the output.
Product information:
Key features: {feature}
{detected_features}
Intended use: {intended_use}
Reference structure: {structure}
Reference copy: {copy}
Included words: {included_words}
Excluded words: {excluded_words}"""),]
elif len(copy.strip()) > 0:
print('------------')
print("Using copy")
messages = [
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
HumanMessage(content=f"""Generate {nversions} versions of the product description for a product with the following information.
Write in a way that target the customer.
For each version, write at least {min_length} words and at most {max_length} words.
Make sure that the structure of each output follows the structure of the reference copy.
Make sure to use the tone of voice, rythm, cadence and style of the reference copy for each output.
Use markdown format for each output.
Make sure that the structure of each output follows the structure of the reference copy.
Do not use any of the excluded words in the output.
Include all included words in the output.
Do not hallucinate any information.
Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
Use creative language in each output, do not use the common ways of starting product descriptions.
Avoid common phrases and cliches such as "Step into something", "Elevate something", "Discover something", "Unleash something", "Embrace something", and similar phrases.
For each version, try to write in different style.
Put each item in the list on a separate line. Use '-' character to start each item.
Rate the quality of each version based on the following criteria:
- how well it follows the tone of voice, rythm, cadence and style of the reference copy.
- how faithful it describes the product features.
- how well it avoid the excluded words.
- how well it includes the included words.
- how creative the language is
The score should be a number between 0 and 10 with 10 being the best quality.
Return the result in the following JSON format:
{{
"versions": [
{{
"id": 1,
"content": The first product description,
"explanation": A less than 20 word explanation of the score of the first product description,
"score": The score of the first product description
}},
{{
"id": 2,
"content": The second product description,
"explanation": A less than 20 word explanation of the score of the first product description,
"score": The score of the second product description
}},
...
],
"best_version": {{
"explanation": Explanation for why this version is the best,
"id": The id of the best version
}}
}}
Make sure that the output is in JSON format, no extra text should be included in the output.
Product information:
Key features: {feature}
{detected_features}
Intended use: {intended_use}
Reference copy: {copy}
Included words: {included_words}
Excluded words: {excluded_words}"""),]
print(messages[1].content)
print('------------')
elif len(structure.strip()) > 0:
print('------------')
print("Using structure")
messages = [
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
HumanMessage(content=f"""Generate {nversions} versions of the product description for a product with the following information.
Write in a way that target the customer.
For each version, write at least {min_length} words and at most {max_length} words.
Make sure that the structure of each output follows the reference structure. Do not add any additional sentences or structures that are not in the reference structure.
Use markdown format for each output.
Do not include any part of the reference structure in the output.
Make sure that the structure of each output follows the reference structure.
Do not use any of the excluded words in the output.
Include all included words in the output.
Do not hallucinate any information.
Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
Use creative language in each output, do not use the common ways of starting product descriptions.
Avoid common phrases and cliches such as "Step into something", "Elevate something", "Discover something", "Unleash something", "Embrace something", and similar phrases.
For each version, try to write in different style.
Put each item in the list on a separate line. Use '-' character to start each item.
Rate the quality of each version based on the following criteria:
- how well it follows the reference structure.
- how faithful it describes the product features.
- how well it avoid the excluded words.
- how well it includes the included words.
- how creative the language is.
The score should be a number between 0 and 10 with 10 being the best quality.
Return the result in the following JSON format:
{{
"versions": [
{{
"id": 1,
"content": The first product description,
"explanation": A less than 20 word explanation of the score of the first product description,
"score": The score of the first product description
}},
{{
"id": 2,
"content": The second product description,
"explanation": A less than 20 word explanation of the score of the first product description,
"score": The score of the second product description
}},
...
],
"best_version": {{
"explanation": Explanation for why this version is the best,
"id": The id of the best version
}}
}}
Make sure that the output is in JSON format, no extra text should be included in the output.
Product information:
Key features: {feature}
{detected_features}
Intended use: {intended_use}
Reference structure: {structure}
Included words: {included_words}
Excluded words: {excluded_words}"""),]
print(messages[1].content)
print('------------')
batch.append(messages)
descriptions = ""
response = chat.batch(batch)
print(response)
parser = JsonOutputParser()
jresponse = [parser.parse(msg.content) for msg in response]
descriptions = []
for jr in jresponse:
print(f'{jr=}')
bestid = jr["best_version"]["id"]
for d in jr["versions"]:
if d["id"] == bestid:
bestd = post_process(d["content"]) + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {jr['best_version']['explanation']}" if debug else "")
bests = d["score"]
break
# bests = 0
# bestd = ""
# for d in jr:
# print(f'{d["score"]=}, {d["id"]=}, {bests=}')
# if d["score"] > bests:
# bests = d["score"]
# bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
# elif d["score"] == bests and random.random() > 0.5:
# bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
# if d["id"] == bestid:
# bests = d["score"]
# bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
# break
descriptions.append(bestd)
print("\n\nRewriting with glossary")
rewrite = glossary_rewrite(chat=chat, glossary=glossary, text=descriptions[0])
if rewrite != "":
descriptions[0] = "Original:\n\n" + descriptions[0] + "\n\nRewritten:\n\n" + rewrite
print("\n\nDone rewriting with glossary\n\n")
md_content = "\n\n---\n\n".join(descriptions)
alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else ""
alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {}
result_json = {"outputs": jresponse if debug else descriptions, "alt_text": alt_text_dict}
result_md = md_content + alt_texts_str + '\n'.join([f'![Product photo](data:image/png;base64,{base64_image} "{alt_text}")' if base64_image != "" else "" for (base64_image, alt_text) in zip(base64_images, alt_texts)])
return result_md, result_json
visible = 0
def add_output_click(*struct_ref):
global visible
print("Adding output ", visible)
# print(struct_ref)
visible += 1
structure_texts = struct_ref[::2]
reference_texts = struct_ref[1::2]
structures = [gr.Textbox(label=f"Structure {i}", lines=10, value=structure_texts[i], interactive=True, visible=i <= visible) for i in range(10)]
references = [gr.Textbox(label=f"Reference copy {i}", lines=3, value=reference_texts[i], interactive=True, visible=i <= visible) for i in range(10)]
struct_ref = [val for pair in zip(structures, references) for val in pair]
return struct_ref
def remove_output_click(*struct_ref):
global visible
print("Removing output", visible)
if visible == 0:
return struct_ref
visible -= 1
structure_texts = struct_ref[::2]
reference_texts = struct_ref[1::2]
structures = [gr.Textbox(label=f"Structure {i}", lines=10, value=structure_texts[i] if i <= visible else "", interactive=True, visible=i <= visible) for i in range(10)]
references = [gr.Textbox(label=f"Reference copy {i}", lines=3, value=reference_texts[i] if i <= visible else "", interactive=True, visible=i <= visible) for i in range(10)]
struct_ref = [val for pair in zip(structures, references) for val in pair]
return struct_ref
# def show_advanced(model, temperature):
# model = gr.Dropdown(models, value="gpt-4-turbo", interactive=True, label="Model", visible=True)
# temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
# return model, temperature
with gr.Blocks() as demo:
visible = 0
print("Building interface")
with gr.Row():
with gr.Column():
feature = gr.Textbox(label="Features", value=feature_text, lines=3, interactive=True)
image = gr.Gallery(label="Images")
garment_type = gr.Textbox(label="Garment Type", value="all", lines=1, interactive=True)
# language = gr.Dropdown(languages, value="American English", interactive=True, label="Language")
with gr.Accordion(label="Advanced Options", open=False):
model = gr.Dropdown(models, value="claude-3-5-sonnet-20241022", interactive=True, label="Model", visible=True)
temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
nversions = gr.Slider(minimum=1, maximum=10, value=5, step=int, interactive=True, label="Number of versions", visible=True)
excluded_words = gr.Textbox(label="Excluded words", interactive=True, lines=2)
included_words = gr.Textbox(label="Included words", interactive=True, lines=2)
# glossary = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2,"static"), headers=["Description", "Way of writing"], label="Glossary", interactive=True)
glossary_upload = gr.UploadButton(label="Upload Glossary", interactive=True, file_types=["csv"])
debug = gr.Checkbox(label="Debug", interactive=True, value=True)
with gr.Row():
submit = gr.Button(value="Submit")
# advanced = gr.Button(value="Advanced")
with gr.Column():
visible = 0
struct_ref = [val for i in range(10) for val in
[gr.Textbox(label=f"Structure {i}", lines=10, value="", interactive=True, visible=i <= visible),
gr.Textbox(label=f"Reference copy {i}", lines=3, value="", interactive=True, visible=i <= visible)]]
struct_ref[0].value = structure_text
struct_ref[1].value = reference_text
# struct_ref[2].value = structure_text_2
with gr.Row():
add_output = gr.Button(value="Add Output")
remove_output = gr.Button(value="Remove Output")
add_output.click(add_output_click, inputs=struct_ref, outputs=struct_ref)
remove_output.click(remove_output_click, inputs=struct_ref, outputs=struct_ref)
with gr.Column():
md_output = gr.Markdown(label="Output", show_label=True, line_breaks=True)
json_output = gr.JSON(label="JSON Output")
submit.click(generate, inputs=[feature, image, garment_type, model, temperature, nversions,
excluded_words, included_words, glossary_upload, debug, *struct_ref],
outputs=[md_output, json_output])
# advanced.click(show_advanced, inputs=[], outputs=[model, temperature])
import bcrypt
def authf(username, password):
try:
with open("passwords.txt", "r") as f:
for line in f.readlines():
u, p = line.strip().split()
# print(u, p, password)
if u == username and bcrypt.checkpw(password.encode('utf-8'), p.encode('utf-8')):
return True
except Exception as e:
print("Error reading password", e)
traceback.print_exc()
return False
if __name__ == '__main__':
# demo.launch(server_name="0.0.0.0", auth=authf)
demo.launch()