Spaces:
Paused
Paused
add language detection, improve the ability to follow the reference copy
Browse files
app.py
CHANGED
|
@@ -153,6 +153,65 @@ def detect_features(image_paths, garment_type, language="English"):
|
|
| 153 |
return "", []
|
| 154 |
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
def generate(*data):
|
| 157 |
global visible
|
| 158 |
print("visible", visible)
|
|
@@ -171,6 +230,17 @@ def generate(*data):
|
|
| 171 |
print(f"{glossary=}")
|
| 172 |
print(f"{struct_ref=}")
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
image_features, base64_images = detect_features(image, garment_type)
|
| 175 |
detected_features = ""
|
| 176 |
intended_use = ""
|
|
@@ -180,12 +250,6 @@ def generate(*data):
|
|
| 180 |
detected_features = ", ".join(image_features["features"])
|
| 181 |
intended_use = "Intended use: " + ", ".join(image_features["intended_use"])
|
| 182 |
print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}")
|
| 183 |
-
if model.startswith("gpt"):
|
| 184 |
-
chat = ChatOpenAI(model=model)
|
| 185 |
-
elif model.startswith("claude"):
|
| 186 |
-
chat = ChatAnthropic(model_name=model, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"])
|
| 187 |
-
else:
|
| 188 |
-
chat = ChatGroq(model_name=model, api_key=os.environ["GROQ_API_KEY"])
|
| 189 |
|
| 190 |
batch = []
|
| 191 |
for i in range(visible + 1):
|
|
@@ -195,18 +259,19 @@ def generate(*data):
|
|
| 195 |
# visible = i
|
| 196 |
# break
|
| 197 |
messages = [
|
| 198 |
-
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites."""),
|
| 199 |
HumanMessage(content=f"""Write a product description with the following features.
|
| 200 |
Make sure that the description follows the structure of the reference structure.
|
| 201 |
Make sure to use markdown format for the output.
|
| 202 |
Make sure that the entire output is written entirely in language defined in the reference structure.
|
|
|
|
| 203 |
Use language that is suitable for the type of document specified in the reference structure.
|
| 204 |
Use a consistent tone of voice throughout the text.
|
| 205 |
If the reference text is not empty, write the product description in the tone of voice of the reference text.
|
| 206 |
-
Make sure to output the product description only, do not include any preceeding text like "Here is your product description".
|
| 207 |
Do not include any part of the reference structure in the output.
|
| 208 |
Do not use any of the excluded words in the output.
|
| 209 |
Make sure to include all of the included words in the output.
|
|
|
|
| 210 |
|
| 211 |
{feature + detected_features}
|
| 212 |
{intended_use}
|
|
@@ -221,6 +286,38 @@ def generate(*data):
|
|
| 221 |
response = chat.batch(batch, temperature=temperature)
|
| 222 |
print(response)
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
description = "\n---\n".join([msg.content for msg in response])
|
| 226 |
md_content = description
|
|
|
|
| 153 |
return "", []
|
| 154 |
|
| 155 |
|
| 156 |
+
import re
|
| 157 |
+
def parse_structure(struct_ref):
|
| 158 |
+
languages = ["_"] * (len(struct_ref) // 2)
|
| 159 |
+
types = ["_"] * (len(struct_ref) // 2)
|
| 160 |
+
for si in range(0, len(struct_ref), 2):
|
| 161 |
+
parts = re.findall('[a-zA-Z\n ]+', struct_ref[si])
|
| 162 |
+
for idx, part in enumerate(parts):
|
| 163 |
+
if "language" in part.lower():
|
| 164 |
+
lang = parts[idx + 1].strip()
|
| 165 |
+
languages[si // 2] = lang
|
| 166 |
+
if "type" in part.lower():
|
| 167 |
+
type = parts[idx + 1].strip()
|
| 168 |
+
types[si // 2] = type
|
| 169 |
+
|
| 170 |
+
return types, languages
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def detect_language(texts, model):
|
| 174 |
+
langs = ["_"] * len(texts)
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
messages = []
|
| 178 |
+
lang_map = {}
|
| 179 |
+
for i, text in enumerate(texts):
|
| 180 |
+
if len(text.strip()) > 0:
|
| 181 |
+
lang_mess = [HumanMessage(content=f"What is the language of the following text? Output the language only. "
|
| 182 |
+
f"\n ```{text}```")]
|
| 183 |
+
print(f"{lang_mess=}")
|
| 184 |
+
messages.append(lang_mess)
|
| 185 |
+
lang_map[i] = len(messages) - 1
|
| 186 |
+
detected_langs = model.batch(messages)
|
| 187 |
+
print(f"{detected_langs=}")
|
| 188 |
+
for k, v in lang_map.items():
|
| 189 |
+
langs[k] = detected_langs[v].content
|
| 190 |
+
except Exception as e:
|
| 191 |
+
print(e.__class__, e)
|
| 192 |
+
traceback.print_exc()
|
| 193 |
+
|
| 194 |
+
return langs
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def get_language(struct_lang, copy_lang):
|
| 198 |
+
if struct_lang != "_":
|
| 199 |
+
return struct_lang
|
| 200 |
+
if copy_lang != "_":
|
| 201 |
+
return copy_lang
|
| 202 |
+
return "English"
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def get_model(model_name):
|
| 206 |
+
if model_name.startswith("gpt"):
|
| 207 |
+
chat = ChatOpenAI(model=model_name)
|
| 208 |
+
elif model_name.startswith("claude"):
|
| 209 |
+
chat = ChatAnthropic(model_name=model_name, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"])
|
| 210 |
+
else:
|
| 211 |
+
chat = ChatGroq(model_name=model_name, api_key=os.environ["GROQ_API_KEY"])
|
| 212 |
+
return chat
|
| 213 |
+
|
| 214 |
+
|
| 215 |
def generate(*data):
|
| 216 |
global visible
|
| 217 |
print("visible", visible)
|
|
|
|
| 230 |
print(f"{glossary=}")
|
| 231 |
print(f"{struct_ref=}")
|
| 232 |
|
| 233 |
+
chat = get_model(model)
|
| 234 |
+
|
| 235 |
+
types, struct_languages = parse_structure(struct_ref)
|
| 236 |
+
copy_languages = detect_language([struct_ref[2 * i + 1] for i in range(visible + 1)], model=chat)
|
| 237 |
+
languages = [get_language(struct_lang=struct_lang, copy_lang=copy_lang) for struct_lang, copy_lang in zip(struct_languages, copy_languages)]
|
| 238 |
+
|
| 239 |
+
print("Struct languages--------------------------------------------\n", struct_languages)
|
| 240 |
+
print("Copy languages--------------------------------------------\n", copy_languages)
|
| 241 |
+
print("Languages--------------------------------------------\n", languages)
|
| 242 |
+
# print("Types--------------------------------------------", types)
|
| 243 |
+
|
| 244 |
image_features, base64_images = detect_features(image, garment_type)
|
| 245 |
detected_features = ""
|
| 246 |
intended_use = ""
|
|
|
|
| 250 |
detected_features = ", ".join(image_features["features"])
|
| 251 |
intended_use = "Intended use: " + ", ".join(image_features["intended_use"])
|
| 252 |
print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
batch = []
|
| 255 |
for i in range(visible + 1):
|
|
|
|
| 259 |
# visible = i
|
| 260 |
# break
|
| 261 |
messages = [
|
| 262 |
+
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
|
| 263 |
HumanMessage(content=f"""Write a product description with the following features.
|
| 264 |
Make sure that the description follows the structure of the reference structure.
|
| 265 |
Make sure to use markdown format for the output.
|
| 266 |
Make sure that the entire output is written entirely in language defined in the reference structure.
|
| 267 |
+
Make sure to output the product description only, do not include any preceeding text like "Here is your product description".
|
| 268 |
Use language that is suitable for the type of document specified in the reference structure.
|
| 269 |
Use a consistent tone of voice throughout the text.
|
| 270 |
If the reference text is not empty, write the product description in the tone of voice of the reference text.
|
|
|
|
| 271 |
Do not include any part of the reference structure in the output.
|
| 272 |
Do not use any of the excluded words in the output.
|
| 273 |
Make sure to include all of the included words in the output.
|
| 274 |
+
Do not hallucinate any information.
|
| 275 |
|
| 276 |
{feature + detected_features}
|
| 277 |
{intended_use}
|
|
|
|
| 286 |
response = chat.batch(batch, temperature=temperature)
|
| 287 |
print(response)
|
| 288 |
|
| 289 |
+
batch = []
|
| 290 |
+
rewrite_map = {}
|
| 291 |
+
for i in range(visible + 1):
|
| 292 |
+
structure = struct_ref[2 * i]
|
| 293 |
+
reference = struct_ref[2 * i + 1]
|
| 294 |
+
if len(reference.strip()) > 0:
|
| 295 |
+
messages = [
|
| 296 |
+
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. You write in {languages[i]} language."""),
|
| 297 |
+
HumanMessage(content=f"""Rewrite the following product description in the style and tone of voice
|
| 298 |
+
of the reference product description.
|
| 299 |
+
Make sure that the structure and length of the output is similar to the reference product description.
|
| 300 |
+
Make sure that the output is written in {languages[i]} language.
|
| 301 |
+
Output the product description in markdown format.
|
| 302 |
+
|
| 303 |
+
Product description to rewirte:
|
| 304 |
+
```{response[i].content}```
|
| 305 |
+
|
| 306 |
+
Reference product description:
|
| 307 |
+
```{reference}```
|
| 308 |
+
""")
|
| 309 |
+
]
|
| 310 |
+
batch.append(messages)
|
| 311 |
+
rewrite_map[i] = len(batch) - 1
|
| 312 |
+
|
| 313 |
+
print("Rewrite_map", rewrite_map)
|
| 314 |
+
print("Rewriting")
|
| 315 |
+
re_response = chat.batch(batch, temperature=temperature)
|
| 316 |
+
for i in range(len(re_response)):
|
| 317 |
+
print(f"Original: {response[i].content}")
|
| 318 |
+
print(f"Rewritten: {re_response[i].content}")
|
| 319 |
+
response = [re_response[rewrite_map[i]] if i in rewrite_map else response[i] for i in range(visible + 1)]
|
| 320 |
+
print("Done rewriting")
|
| 321 |
|
| 322 |
description = "\n---\n".join([msg.content for msg in response])
|
| 323 |
md_content = description
|