tunght commited on
Commit
7cbb3f8
·
1 Parent(s): 2fa5073

add language detection, improve the ability to follow the reference copy

Browse files
Files changed (1) hide show
  1. app.py +105 -8
app.py CHANGED
@@ -153,6 +153,65 @@ def detect_features(image_paths, garment_type, language="English"):
153
  return "", []
154
 
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  def generate(*data):
157
  global visible
158
  print("visible", visible)
@@ -171,6 +230,17 @@ def generate(*data):
171
  print(f"{glossary=}")
172
  print(f"{struct_ref=}")
173
 
 
 
 
 
 
 
 
 
 
 
 
174
  image_features, base64_images = detect_features(image, garment_type)
175
  detected_features = ""
176
  intended_use = ""
@@ -180,12 +250,6 @@ def generate(*data):
180
  detected_features = ", ".join(image_features["features"])
181
  intended_use = "Intended use: " + ", ".join(image_features["intended_use"])
182
  print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}")
183
- if model.startswith("gpt"):
184
- chat = ChatOpenAI(model=model)
185
- elif model.startswith("claude"):
186
- chat = ChatAnthropic(model_name=model, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"])
187
- else:
188
- chat = ChatGroq(model_name=model, api_key=os.environ["GROQ_API_KEY"])
189
 
190
  batch = []
191
  for i in range(visible + 1):
@@ -195,18 +259,19 @@ def generate(*data):
195
  # visible = i
196
  # break
197
  messages = [
198
- SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites."""),
199
  HumanMessage(content=f"""Write a product description with the following features.
200
  Make sure that the description follows the structure of the reference structure.
201
  Make sure to use markdown format for the output.
202
  Make sure that the entire output is written entirely in language defined in the reference structure.
 
203
  Use language that is suitable for the type of document specified in the reference structure.
204
  Use a consistent tone of voice throughout the text.
205
  If the reference text is not empty, write the product description in the tone of voice of the reference text.
206
- Make sure to output the product description only, do not include any preceeding text like "Here is your product description".
207
  Do not include any part of the reference structure in the output.
208
  Do not use any of the excluded words in the output.
209
  Make sure to include all of the included words in the output.
 
210
 
211
  {feature + detected_features}
212
  {intended_use}
@@ -221,6 +286,38 @@ def generate(*data):
221
  response = chat.batch(batch, temperature=temperature)
222
  print(response)
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
  description = "\n---\n".join([msg.content for msg in response])
226
  md_content = description
 
153
  return "", []
154
 
155
 
156
+ import re
157
+ def parse_structure(struct_ref):
158
+ languages = ["_"] * (len(struct_ref) // 2)
159
+ types = ["_"] * (len(struct_ref) // 2)
160
+ for si in range(0, len(struct_ref), 2):
161
+ parts = re.findall('[a-zA-Z\n ]+', struct_ref[si])
162
+ for idx, part in enumerate(parts):
163
+ if "language" in part.lower():
164
+ lang = parts[idx + 1].strip()
165
+ languages[si // 2] = lang
166
+ if "type" in part.lower():
167
+ type = parts[idx + 1].strip()
168
+ types[si // 2] = type
169
+
170
+ return types, languages
171
+
172
+
173
+ def detect_language(texts, model):
174
+ langs = ["_"] * len(texts)
175
+
176
+ try:
177
+ messages = []
178
+ lang_map = {}
179
+ for i, text in enumerate(texts):
180
+ if len(text.strip()) > 0:
181
+ lang_mess = [HumanMessage(content=f"What is the language of the following text? Output the language only. "
182
+ f"\n ```{text}```")]
183
+ print(f"{lang_mess=}")
184
+ messages.append(lang_mess)
185
+ lang_map[i] = len(messages) - 1
186
+ detected_langs = model.batch(messages)
187
+ print(f"{detected_langs=}")
188
+ for k, v in lang_map.items():
189
+ langs[k] = detected_langs[v].content
190
+ except Exception as e:
191
+ print(e.__class__, e)
192
+ traceback.print_exc()
193
+
194
+ return langs
195
+
196
+
197
+ def get_language(struct_lang, copy_lang):
198
+ if struct_lang != "_":
199
+ return struct_lang
200
+ if copy_lang != "_":
201
+ return copy_lang
202
+ return "English"
203
+
204
+
205
+ def get_model(model_name):
206
+ if model_name.startswith("gpt"):
207
+ chat = ChatOpenAI(model=model_name)
208
+ elif model_name.startswith("claude"):
209
+ chat = ChatAnthropic(model_name=model_name, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"])
210
+ else:
211
+ chat = ChatGroq(model_name=model_name, api_key=os.environ["GROQ_API_KEY"])
212
+ return chat
213
+
214
+
215
  def generate(*data):
216
  global visible
217
  print("visible", visible)
 
230
  print(f"{glossary=}")
231
  print(f"{struct_ref=}")
232
 
233
+ chat = get_model(model)
234
+
235
+ types, struct_languages = parse_structure(struct_ref)
236
+ copy_languages = detect_language([struct_ref[2 * i + 1] for i in range(visible + 1)], model=chat)
237
+ languages = [get_language(struct_lang=struct_lang, copy_lang=copy_lang) for struct_lang, copy_lang in zip(struct_languages, copy_languages)]
238
+
239
+ print("Struct languages--------------------------------------------\n", struct_languages)
240
+ print("Copy languages--------------------------------------------\n", copy_languages)
241
+ print("Languages--------------------------------------------\n", languages)
242
+ # print("Types--------------------------------------------", types)
243
+
244
  image_features, base64_images = detect_features(image, garment_type)
245
  detected_features = ""
246
  intended_use = ""
 
250
  detected_features = ", ".join(image_features["features"])
251
  intended_use = "Intended use: " + ", ".join(image_features["intended_use"])
252
  print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}")
 
 
 
 
 
 
253
 
254
  batch = []
255
  for i in range(visible + 1):
 
259
  # visible = i
260
  # break
261
  messages = [
262
+ SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
263
  HumanMessage(content=f"""Write a product description with the following features.
264
  Make sure that the description follows the structure of the reference structure.
265
  Make sure to use markdown format for the output.
266
  Make sure that the entire output is written entirely in language defined in the reference structure.
267
+ Make sure to output the product description only, do not include any preceeding text like "Here is your product description".
268
  Use language that is suitable for the type of document specified in the reference structure.
269
  Use a consistent tone of voice throughout the text.
270
  If the reference text is not empty, write the product description in the tone of voice of the reference text.
 
271
  Do not include any part of the reference structure in the output.
272
  Do not use any of the excluded words in the output.
273
  Make sure to include all of the included words in the output.
274
+ Do not hallucinate any information.
275
 
276
  {feature + detected_features}
277
  {intended_use}
 
286
  response = chat.batch(batch, temperature=temperature)
287
  print(response)
288
 
289
+ batch = []
290
+ rewrite_map = {}
291
+ for i in range(visible + 1):
292
+ structure = struct_ref[2 * i]
293
+ reference = struct_ref[2 * i + 1]
294
+ if len(reference.strip()) > 0:
295
+ messages = [
296
+ SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. You write in {languages[i]} language."""),
297
+ HumanMessage(content=f"""Rewrite the following product description in the style and tone of voice
298
+ of the reference product description.
299
+ Make sure that the structure and length of the output is similar to the reference product description.
300
+ Make sure that the output is written in {languages[i]} language.
301
+ Output the product description in markdown format.
302
+
303
+ Product description to rewirte:
304
+ ```{response[i].content}```
305
+
306
+ Reference product description:
307
+ ```{reference}```
308
+ """)
309
+ ]
310
+ batch.append(messages)
311
+ rewrite_map[i] = len(batch) - 1
312
+
313
+ print("Rewrite_map", rewrite_map)
314
+ print("Rewriting")
315
+ re_response = chat.batch(batch, temperature=temperature)
316
+ for i in range(len(re_response)):
317
+ print(f"Original: {response[i].content}")
318
+ print(f"Rewritten: {re_response[i].content}")
319
+ response = [re_response[rewrite_map[i]] if i in rewrite_map else response[i] for i in range(visible + 1)]
320
+ print("Done rewriting")
321
 
322
  description = "\n---\n".join([msg.content for msg in response])
323
  md_content = description