Spaces:

creativeforce
/

Copywriting

Paused

App Files Files Community

Copywriting / app.py

tunght

add target customer prompt

39eabcb over 1 year ago

raw

history blame

27.4 kB

	import random
	import traceback
	import gradio as gr
	import numpy as np
	import os
	from langchain_core.output_parsers import JsonOutputParser

	from langchain_openai.chat_models import ChatOpenAI
	from langchain.schema import HumanMessage, SystemMessage, AIMessage
	from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages
	from langchain_groq import ChatGroq
	import openai

	from langchain import hub
	from langchain_chroma import Chroma
	from langchain_community.document_loaders import WebBaseLoader, CSVLoader
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	from langchain_openai import OpenAIEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_core.vectorstores import VectorStoreRetriever


	feature_text = "Brand: Duckly. \nProduct name: Duck runner pro. \nKey properties: t-shirt, for running, sweat wicking, for marathon, 100% cotton."

	garment_type = "all"

	reference_text = ""

	structure_text = \
	"""# Headline {{ headline \| inspiring, bold, action-oriented, max 8 words }}

	## Introduction
	{{ introduction_paragraph \| motivational, passionate, 2-3 sentences }}

	## Features and Benefits
	{% for feature in features %}
	### Feature {{ loop.index }}: {{ feature.name \| dynamic, direct, 5-6 words }}
	{{ feature.details \| energetic, clear, 3-4 sentences }}
	{% endfor %}

	## Technical Specifications
	{{ technical_specs \| informative, to the point, concise list format }}
	"""

	structure_text_1 = """[type: UK website, style=true, language=English]
	{{ introduction_paragraph \| motivational, passionate, 1-2 sentences }}
	{% for feature in features as bulleted list %}
	{{ feature.description \| dynamic, direct, 3-6 words }}
	{% endfor %}
	{{ technical_specs \| informative, to the point, concise list format }}"""

	structure_text_2 = """[type: Japanese newsletter, style=true, language=Japanese]
	{{ introduction_paragraph \| motivational, passionate, 3-6 sentences }}"""

	languages = ["American English",
	"British English",
	"German",
	"French",
	"Chinese",
	"Spanish",
	"Dutch",
	"Italian",
	"Japanese",
	"Polish",
	"Portuguese"]

	models = ["gpt-4-turbo",
	"gpt-4o",
	"gpt-3.5-turbo",
	"claude-3-sonnet-20240229",
	"claude-3-opus-20240229",
	"claude-3-5-sonnet-20240620"
	#"llama3-70b-8192",
	]

	openai.api_key = os.environ["OPENAI_API_KEY"]


	import base64
	import requests

	# OpenAI API Key

	# Function to encode the image
	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')


	import json


	def get_json(text: str):
	text = text.strip().replace('`', '').replace('json', '')
	if text.startswith("No garment detected"):
	return {
	"features": [],
	"intended_use": [],
	"alt_text": []
	}
	return json.loads(text)


	def detect_features(image_paths, garment_type, language="English"):
	# Path to your image
	# image_path = "path_to_your_image.jpg"

	# Getting the base64 string
	try:
	base64_images = [encode_image(image_path[0]) for image_path in image_paths]
	if garment_type == "" or garment_type == "all":
	garment_type = "garment"

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {openai.api_key}"
	}

	payload = {
	"model": "gpt-4o",
	# "model": "gpt-4-turbo",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": f"""Describe the features of the {garment_type} in the photos in less than 100 words.
	What is the intended use of the {garment_type} in this image, use at most 5 words for intended use?
	Generate alt text for each of the images.
	Make sure to output the alt text in {language} language.
	If the photo does not contain a garment, return 'No garment detected'.
	If the photo contains a garment, return the result in in the following JSON format without any preceding or trailing text:

	{{
	"features": [list of comma separated features],
	"intended_use": [list of comma separated intended uses],
	"alt_text": [list of alt text for image 1, alt text for image 2]
	}}"""
	},
	] + [{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	} for base64_image in base64_images]
	}
	],
	"temperature": 0.0,
	"max_tokens": 300
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

	print(response)

	response = response.json()
	print("image features", response["choices"][0]['message']['content'])
	jresponse = get_json(response["choices"][0]['message']['content'])
	return jresponse, base64_images
	except Exception as e:
	print(e.__class__, e)
	traceback.print_exc()
	return "", []


	import re
	def parse_structure(struct_ref):
	languages = ["_"] * (len(struct_ref) // 2)
	types = ["_"] * (len(struct_ref) // 2)
	for si in range(0, len(struct_ref), 2):
	parts = re.findall('[a-zA-Z\n ]+', struct_ref[si])
	for idx, part in enumerate(parts):
	if "language" in part.lower():
	lang = parts[idx + 1].strip()
	languages[si // 2] = lang
	if "type" in part.lower():
	type = parts[idx + 1].strip()
	types[si // 2] = type

	return types, languages


	def detect_language(texts, model):
	langs = ["_"] * len(texts)

	try:
	messages = []
	lang_map = {}
	for i, text in enumerate(texts):
	if len(text.strip()) > 0:
	lang_mess = [HumanMessage(content=f"What is the language of the following text? Output the language only. "
	f"\n ```{text}```")]
	print(f"{lang_mess=}")
	messages.append(lang_mess)
	lang_map[i] = len(messages) - 1
	detected_langs = model.batch(messages)
	print(f"{detected_langs=}")
	for k, v in lang_map.items():
	langs[k] = detected_langs[v].content
	except Exception as e:
	print(e.__class__, e)
	traceback.print_exc()

	return langs


	def get_language(struct_lang, copy_lang):
	if struct_lang != "_":
	return struct_lang
	if copy_lang != "_":
	return copy_lang
	return "English"


	def get_model(model_name):
	if model_name.startswith("gpt"):
	chat = ChatOpenAI(model=model_name, max_tokens=8192)
	elif model_name.startswith("claude"):
	chat = ChatAnthropic(model_name=model_name, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"], max_tokens_to_sample=4096)
	else:
	chat = ChatGroq(model_name=model_name, api_key=os.environ["GROQ_API_KEY"])
	return chat


	def build_glossary(glossary_file, fieldnames=None) -> VectorStoreRetriever:
	loader = CSVLoader(file_path=glossary_file,
	csv_args={"delimiter": ",",
	"quotechar": '"'})
	# "fieldnames": fieldnames})

	docs = loader.load()
	vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings())
	retriever = vectorstore.as_retriever()

	return retriever


	def glossary_rewrite(glossary: VectorStoreRetriever, text: str):
	try:
	terms = glossary.invoke(input=text)
	print("\n".join([d.page_content for d in terms]))
	glossary_str = "\n\n".join([d.page_content.replace('\n', '. ') for d in terms])
	except Exception as e:
	print(e.__class__, e)
	traceback.print_exc()
	terms = []

	if len(terms) > 0:
	messages = [
	SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in English language."""),
	HumanMessage(content=f"""Rewrite the following text using the terms in the glossary.
	Preserve the original text as much as possible.
	Replace the terms in original text that match the definition with the corresponding terms in the glossary.

	Terms, Definitions
	{glossary_str}
	"""),
	]

	print(f"HumanMessage={messages[1].content}")


	def generate(*data):
	global visible
	print("visible", visible)

	nargs = 9
	feature, image, garment_type, model, temperature, excluded_words, included_words, glossary_upload, debug = data[:nargs]
	struct_ref = data[nargs:]

	print(f"{feature=}")
	print(f"{image=}")
	print(f"{garment_type=}")
	print(f"{model=}")
	print(f"{temperature=}")
	print(f"{excluded_words=}")
	print(f"{included_words=}")
	print(f"{debug=}")
	# print(f"{glossary=}")
	print(f"{glossary_upload=}")
	# print(f"{struct_ref=}")

	glossary = None
	if glossary_upload is not None:
	glossary = build_glossary(glossary_upload)

	chat = get_model(model)

	types, struct_languages = parse_structure(struct_ref)
	copy_languages = detect_language([struct_ref[2 * i + 1] for i in range(visible + 1)], model=chat)
	languages = [get_language(struct_lang=struct_lang, copy_lang=copy_lang) for struct_lang, copy_lang in zip(struct_languages, copy_languages)]

	# print("Struct languages--------------------------------------------\n", struct_languages)
	# print("Copy languages--------------------------------------------\n", copy_languages)
	# print("Languages--------------------------------------------\n", languages)
	# print("Types--------------------------------------------", types)

	image_features, base64_images = detect_features(image, garment_type)
	detected_features = ""
	intended_use = ""
	alt_texts = []
	if image_features is not None and len(image_features) > 0:
	alt_texts = image_features["alt_text"]
	detected_features = ", ".join(image_features["features"])
	intended_use = "Intended use: " + ", ".join(image_features["intended_use"])
	print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}")

	if glossary:
	print("Getting terms")
	terms = glossary.invoke(input=feature + detected_features)
	for term in terms:
	print(term)

	batch = []
	for i in range(visible + 1):
	structure = struct_ref[2 * i]
	copy = struct_ref[2 * i + 1]
	if len((structure + copy).strip()) > 0:
	if len(copy.strip()) > 0 and len(structure.strip()) > 0:
	print('------------')
	print("Using both copy and structure")
	messages = [
	SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
	HumanMessage(content=f"""Generate 5 versions of the product description for a product with the following information.
	Write in a way that target the customer.
	Make sure that the structure of each output follows the reference structure.
	Make sure to use the tone of voice, rythm, cadence and style of the reference copy for each output.
	Use markdown format for each output.
	Do not include any part of the reference structure in the output.
	Do not use any of the excluded words in the output.
	Include all included words in the output.
	Do not hallucinate any information.
	Use creative language in each output.
	Rate the quality of each version based on the following criteria:
	- how well it follows the reference tone of voice, rythm, cadence and style.
	- how well it follows the reference structure.
	- how faithful it describes the product features.
	- how well it avoid the excluded words.
	- how well it includes the included words.
	- how creative the language is.
	The score should be a number between 0 and 10 with 10 being the best quality.
	Return the result in the following JSON format:
	{{
	"versions": [
	{{
	"id": 1,
	"content": The first product description,
	"explanation": A less than 20 word explanation of the score of the first product description,
	"score": The score of the first product description
	}},
	{{
	"id": 2,
	"content": The second product description,
	"explanation": A less than 20 word explanation of the score of the first product description,
	"score": The score of the second product description
	}},
	...
	],
	"best_version": {{
	"id": The id of the best version,
	"explanation": Explanation for why this version is the best
	}}
	}}
	Make sure that the output is in JSON format, no extra text should be included in the output.

	Product information:
	Key features: {feature + detected_features}
	Intended use: {intended_use}
	Reference structure: {structure}
	Reference copy: {copy}
	Included words: {included_words}
	Excluded words: {excluded_words}"""),]

	elif len(copy.strip()) > 0:
	print('------------')
	print("Using copy")
	messages = [
	SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
	HumanMessage(content=f"""Generate 5 versions of the product description for a product with the following information.
	Write in a way that target the customer.
	Make sure that the structure of each output follows the structure of the reference copy.
	Make sure to use the tone of voice, rythm, cadence and style of the reference copy for each output.
	Use markdown format for each output.
	Do not include any part of the reference structure in the output.
	Do not use any of the excluded words in the output.
	Include all included words in the output.
	Do not hallucinate any information.
	Use creative language in each output.
	Rate the quality of each version based on the following criteria:
	- how well it follows the reference tone of voice, rythm, cadence and style.
	- how well it follows the reference structure.
	- how faithful it describes the product features.
	- how well it avoid the excluded words.
	- how well it includes the included words.
	- how creative the language is.
	The score should be a number between 0 and 10 with 10 being the best quality.
	Return the result in the following JSON format:
	{{
	"versions": [
	{{
	"id": 1,
	"content": The first product description,
	"explanation": A less than 20 word explanation of the score of the first product description,
	"score": The score of the first product description
	}},
	{{
	"id": 2,
	"content": The second product description,
	"explanation": A less than 20 word explanation of the score of the first product description,
	"score": The score of the second product description
	}},
	...
	],
	"best_version": {{
	"id": The id of the best version,
	"explanation": Explanation for why this version is the best
	}}
	}}
	Make sure that the output is in JSON format, no extra text should be included in the output.

	Product information:
	Key features: {feature + detected_features}
	Intended use: {intended_use}
	Reference copy: {copy}
	Included words: {included_words}
	Excluded words: {excluded_words}"""),]
	print(messages[1].content)
	print('------------')

	elif len(structure.strip()) > 0:
	print('------------')
	print("Using structure")
	messages = [
	SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
	HumanMessage(content=f"""Generate 5 versions of the product description for a product with the following information.
	Write in a way that target the customer.
	Make sure that the structure of each output follows the reference structure.
	Use markdown format for each output.
	Do not include any part of the reference structure in the output.
	Do not use any of the excluded words in the output.
	Include all included words in the output.
	Do not hallucinate any information.
	Use creative language in each output.
	Rate the quality of each version based on the following criteria:
	- how well it follows the reference tone of voice, rythm, cadence and style.
	- how well it follows the reference structure.
	- how faithful it describes the product features.
	- how well it avoid the excluded words.
	- how well it includes the included words.
	- how creative the language is.
	The score should be a number between 0 and 10 with 10 being the best quality.
	Return the result in the following JSON format:
	{{
	"versions": [
	{{
	"id": 1,
	"content": The first product description,
	"explanation": A less than 20 word explanation of the score of the first product description,
	"score": The score of the first product description
	}},
	{{
	"id": 2,
	"content": The second product description,
	"explanation": A less than 20 word explanation of the score of the first product description,
	"score": The score of the second product description
	}},
	...
	],
	"best_version": {{
	"id": The id of the best version,
	"explanation": Explanation for why this version is the best
	}}
	}}
	Make sure that the output is in JSON format, no extra text should be included in the output.

	Product information:
	Key features: {feature + detected_features}
	Intended use: {intended_use}
	Reference structure: {structure}
	Reference copy: {copy}
	Included words: {included_words}
	Excluded words: {excluded_words}"""),]
	print(messages[1].content)
	print('------------')
	batch.append(messages)

	descriptions = ""

	response = chat.batch(batch, temperature=temperature)
	print(response)

	parser = JsonOutputParser()
	jresponse = [parser.parse(msg.content) for msg in response]
	descriptions = []
	for jr in jresponse:
	print(f'{jr=}')
	bestid = jr["best_version"]["id"]
	for d in jr["versions"]:
	if d["id"] == bestid:
	bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {jr['best_version']['explanation']}" if debug else "")
	bests = d["score"]
	break
	# bests = 0
	# bestd = ""
	# for d in jr:
	# print(f'{d["score"]=}, {d["id"]=}, {bests=}')
	# if d["score"] > bests:
	# bests = d["score"]
	# bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
	# elif d["score"] == bests and random.random() > 0.5:
	# bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
	# if d["id"] == bestid:
	# bests = d["score"]
	# bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
	# break

	descriptions.append(bestd)

	# glossary_rewrite(glossary, descriptions[0])

	md_content = "\n\n---\n\n".join(descriptions)

	alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else ""

	alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {}
	result_json = {"outputs": jresponse if debug else descriptions, "alt_text": alt_text_dict}
	result_md = md_content + alt_texts_str + '\n'.join([f'![Product photo](data:image/png;base64,{base64_image} "{alt_text}")' if base64_image != "" else "" for (base64_image, alt_text) in zip(base64_images, alt_texts)])
	return result_md, result_json


	visible = 0
	def add_output_click(*struct_ref):
	global visible
	print("Adding output ", visible)
	# print(struct_ref)
	visible += 1
	structure_texts = struct_ref[::2]
	reference_texts = struct_ref[1::2]
	structures = [gr.Textbox(label=f"Structure {i}", lines=10, value=structure_texts[i], interactive=True, visible=i <= visible) for i in range(10)]
	references = [gr.Textbox(label=f"Reference copy {i}", lines=3, value=reference_texts[i], interactive=True, visible=i <= visible) for i in range(10)]

	struct_ref = [val for pair in zip(structures, references) for val in pair]
	return struct_ref


	def remove_output_click(*struct_ref):
	global visible
	print("Removing output", visible)
	if visible == 0:
	return struct_ref

	visible -= 1
	structure_texts = struct_ref[::2]
	reference_texts = struct_ref[1::2]
	structures = [gr.Textbox(label=f"Structure {i}", lines=10, value=structure_texts[i] if i <= visible else "", interactive=True, visible=i <= visible) for i in range(10)]
	references = [gr.Textbox(label=f"Reference copy {i}", lines=3, value=reference_texts[i] if i <= visible else "", interactive=True, visible=i <= visible) for i in range(10)]

	struct_ref = [val for pair in zip(structures, references) for val in pair]
	return struct_ref


	def show_advanced(model, temperature):
	model = gr.Dropdown(models, value="gpt-4-turbo", interactive=True, label="Model", visible=True)
	temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
	return model, temperature


	with gr.Blocks() as demo:
	visible = 0
	print("Building interface")
	with gr.Row():
	with gr.Column():
	feature = gr.Textbox(label="Features", value=feature_text, lines=3, interactive=True)
	image = gr.Gallery(label="Images")
	garment_type = gr.Textbox(label="Garment Type", value="all", lines=1, interactive=True)
	# language = gr.Dropdown(languages, value="American English", interactive=True, label="Language")
	with gr.Accordion(label="Advanced Options", open=False):
	model = gr.Dropdown(models, value="claude-3-5-sonnet-20240620", interactive=True, label="Model", visible=True)
	temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
	excluded_words = gr.Textbox(label="Excluded words", interactive=True, lines=2)
	included_words = gr.Textbox(label="Included words", interactive=True, lines=2)
	# glossary = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2,"static"), headers=["Description", "Way of writing"], label="Glossary", interactive=True)
	glossary_upload = gr.UploadButton(label="Upload Glossary", interactive=True, file_types=["csv"])
	debug = gr.Checkbox(label="Debug", interactive=True, value=True)
	with gr.Row():
	submit = gr.Button(value="Submit")
	# advanced = gr.Button(value="Advanced")

	with gr.Column():
	visible = 0
	struct_ref = [val for i in range(10) for val in
	[gr.Textbox(label=f"Structure {i}", lines=10, value="", interactive=True, visible=i <= visible),
	gr.Textbox(label=f"Reference copy {i}", lines=3, value="", interactive=True, visible=i <= visible)]]
	struct_ref[0].value = structure_text_1
	# struct_ref[2].value = structure_text_2
	with gr.Row():
	add_output = gr.Button(value="Add Output")
	remove_output = gr.Button(value="Remove Output")
	add_output.click(add_output_click, inputs=struct_ref, outputs=struct_ref)
	remove_output.click(remove_output_click, inputs=struct_ref, outputs=struct_ref)
	with gr.Column():
	md_output = gr.Markdown(label="Output", show_label=True)
	json_output = gr.JSON(label="JSON Output")
	submit.click(generate, inputs=[feature, image, garment_type, model, temperature,
	excluded_words, included_words, glossary_upload, debug, *struct_ref],
	outputs=[md_output, json_output])
	# advanced.click(show_advanced, inputs=[], outputs=[model, temperature])


	import bcrypt
	def authf(username, password):
	try:
	with open("passwords.txt", "r") as f:
	for line in f.readlines():
	u, p = line.strip().split()
	# print(u, p, password)
	if u == username and bcrypt.checkpw(password.encode('utf-8'), p.encode('utf-8')):
	return True
	except Exception as e:
	print("Error reading password", e)
	traceback.print_exc()

	return False


	if __name__ == '__main__':
	# demo.launch(server_name="0.0.0.0", auth=authf)
	demo.launch()