tunght commited on
Commit
8312c1b
·
1 Parent(s): 37fb1eb

Add gemini-2.0-flash-thinking

Browse files
Files changed (3) hide show
  1. app.py +279 -150
  2. requirements.txt +2 -1
  3. str2escaped.py +32 -1
app.py CHANGED
@@ -9,8 +9,9 @@ from langchain_openai.chat_models import ChatOpenAI
9
  from langchain.schema import HumanMessage, SystemMessage, AIMessage
10
  from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages
11
  from langchain_google_genai import ChatGoogleGenerativeAI
12
- from langchain_groq import ChatGroq
13
  import openai
 
14
 
15
  from langchain import hub
16
  from langchain_chroma import Chroma
@@ -100,18 +101,15 @@ languages = ["American English",
100
  "Polish",
101
  "Portuguese"]
102
 
103
- models = ["gpt-4-turbo",
104
- "gpt-4o",
105
- "gpt-3.5-turbo",
106
  "claude-3-7-sonnet-latest",
107
- "claude-3-sonnet-20240229",
108
- "claude-3-opus-20240229",
109
  "claude-3-5-sonnet-20240620",
110
  "claude-3-5-sonnet-20241022",
111
- "gemini-1.5-pro"
112
- #"llama3-70b-8192",
113
  ]
114
 
 
 
115
  openai.api_key = os.environ["OPENAI_API_KEY"]
116
 
117
  struct_copy_prompt = """Generate {nversions} versions of the product description for a product with the following information.
@@ -124,8 +122,8 @@ Do not include any part of the reference structure in the output.
124
  The structure of the output should follow the reference structure.
125
  Do not use the structure of the reference copy in the output.
126
  Do not use any of the excluded words in the output.
127
- Include all included words in the output.
128
- Make sure to use product features and intended use in the output.
129
  Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
130
  Note that the reference copy should be used for style and tone only, do not use any part of the reference copy in the output.
131
  Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
@@ -165,16 +163,27 @@ Return the result in the following JSON format:
165
  }}
166
  Make sure that the output is in JSON format, no extra text should be included in the output.
167
 
168
- Product information:
169
- Key features: {key_features}
 
 
170
 
171
- Reference structure: {structure}
 
 
172
 
173
- Reference copy: {copy}
 
 
174
 
175
- Included words: {included_phrases}
 
 
176
 
177
- Excluded words: {excluded_phrases}"""
 
 
 
178
 
179
 
180
  copy_prompt = """Generate {nversions} versions of the product description for a product with the following information.
@@ -185,8 +194,8 @@ Make sure to use the tone of voice, rythm, cadence and style of the reference co
185
  Use markdown format for each output.
186
  Make sure that the structure of each output follows the structure of the reference copy.
187
  Do not use any of the excluded words in the output.
188
- Include all included words in the output.
189
- Make sure to use product features and intended use in the output.
190
  Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
191
  Note that the reference copy should be used for style and tone only, do not use any part of the reference copy in the output.
192
  Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
@@ -225,14 +234,23 @@ Return the result in the following JSON format:
225
  }}
226
  Make sure that the output is in JSON format, no extra text should be included in the output.
227
 
228
- Product information:
229
- Key features: {key_features}
 
 
230
 
231
- Reference copy: {copy}
 
 
232
 
233
- Included words: {included_phrases}
 
 
234
 
235
- Excluded words: {excluded_phrases}"""
 
 
 
236
 
237
 
238
  struct_prompt = """Generate {nversions} versions of the product description for a product with the following information.
@@ -243,8 +261,8 @@ Use markdown format for each output.
243
  Do not include any part of the reference structure in the output.
244
  Make sure that the structure of each output follows the reference structure.
245
  Do not use any of the excluded words in the output.
246
- Include all included words in the output.
247
- Make sure to use product features and intended use in the output.
248
  Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
249
  Note that the reference structure should be used for structure only, do not use any part of the reference structure in the output.
250
  Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
@@ -283,14 +301,82 @@ Return the result in the following JSON format:
283
  }}
284
  Make sure that the output is in JSON format, no extra text should be included in the output.
285
 
286
- Product information:
287
- Key features: {key_features}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
- Reference structure: {structure}
 
 
290
 
291
- Included words: {included_phrases}
 
 
292
 
293
- Excluded words: {excluded_phrases}"""
 
 
 
 
 
 
 
294
 
295
 
296
  improve_structure_prompt = """You are given a structure for a product description.
@@ -299,6 +385,36 @@ Return the reformatted structure only. Do not add any preceding or trailing char
299
 
300
  <structure>/n{structure}</structure>"""
301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
  import base64
304
  import requests
@@ -435,10 +551,11 @@ def get_language(struct_lang, copy_lang):
435
 
436
  def post_process(text: str, guidance_prompt: str, language: str, chat: ChatOpenAI):
437
  messages = [
438
- SystemMessage(content=f"""You are a helpful assistant that edit documents based on the guidlines provided. Make sure to write in {language} language."""),
 
439
  HumanMessage(content=f"""Given the following product description, your task is to
440
  make minimal modification to the product description such that the resulting description
441
- follows the rules defined in the guidlines. Make sure to preserve the structure of the
442
  original text as much as possible. Do not modify the structure of the original text.
443
  Do not change the language of the original text.
444
  Output only the modified text in markdown format.
@@ -451,6 +568,11 @@ Guidelines:
451
  {guidance_prompt}""")
452
  ]
453
 
 
 
 
 
 
454
  response = chat.invoke(messages, temperature=0.0)
455
  text = response.content
456
  return text
@@ -460,11 +582,17 @@ def get_model(model_name, temperature=0.0):
460
  if model_name.startswith("gpt"):
461
  chat = ChatOpenAI(model=model_name, max_tokens=4096, temperature=temperature)
462
  elif model_name.startswith("claude"):
463
- chat = ChatAnthropic(model_name=model_name, anthropic_api_key=os.environ["ANTHROPIC_API_KEY"], max_tokens_to_sample=4096, temperature=temperature)
 
 
 
464
  elif model_name.startswith("gemini"):
465
- chat = ChatGoogleGenerativeAI(model=model_name, api_key=os.environ["GOOGLE_API_KEY"], temperature=temperature)
 
 
466
  else:
467
- chat = ChatGroq(model_name=model_name, api_key=os.environ["GROQ_API_KEY"], temperature=temperature)
 
468
  return chat
469
 
470
 
@@ -481,39 +609,6 @@ def build_glossary(glossary_file, fieldnames=None) -> VectorStoreRetriever:
481
  return retriever
482
 
483
 
484
- def glossary_rewrite(chat: ChatOpenAI, glossary: VectorStoreRetriever, text: str):
485
- try:
486
- terms = glossary.invoke(input=text)
487
- print("\n".join([d.page_content for d in terms]))
488
- glossary_str = "\n\n".join([d.page_content.replace('\n', '. ') for d in terms])
489
-
490
- if len(terms) > 0:
491
- messages = [
492
- SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in English language."""),
493
- HumanMessage(content=f"""Rewrite the following text using the terms in the glossary.
494
- Preserve the original text as much as possible.
495
- Replace the terms in original text that match the definition with the corresponding terms in the glossary.
496
- Output only the rewritten text in markdown format.
497
-
498
- Terms, Definitions
499
- {glossary_str}
500
-
501
- Text to rewrite:
502
- {text}
503
- """),]
504
-
505
- print(f"HumanMessage={messages[1].content}")
506
- response = chat.invoke(messages, temperature=0.0)
507
- print(f"Response=\n{response.content}")
508
- return response.content
509
- except Exception as e:
510
- print(e.__class__, e)
511
- traceback.print_exc()
512
- terms = []
513
-
514
- return ""
515
-
516
-
517
  def improve_structure(chat: ChatOpenAI, structure: str):
518
  messages = [
519
  HumanMessage(content=improve_structure_prompt.format(structure=structure)),]
@@ -523,6 +618,28 @@ def improve_structure(chat: ChatOpenAI, structure: str):
523
  return response.content
524
 
525
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
  def generate(*data):
527
  global visible
528
  print("visible", visible)
@@ -539,13 +656,6 @@ def generate(*data):
539
  print(f"{excluded_phrases=}")
540
  print(f"{included_phrases=}")
541
  print(f"{debug=}")
542
- # print(f"{glossary=}")
543
- print(f"{glossary_upload=}")
544
- # print(f"{struct_ref=}")
545
-
546
- glossary = None
547
- if glossary_upload is not None:
548
- glossary = build_glossary(glossary_upload)
549
 
550
  chat = get_model(model, temperature=temperature)
551
 
@@ -571,97 +681,116 @@ def generate(*data):
571
 
572
  key_features = key_features + ", " + detected_features + "\nIntended uses: " + intended_use
573
 
574
- # if glossary:
575
- # print("Getting terms")
576
- # terms = glossary.invoke(input=feature + detected_features)
577
- # for term in terms:
578
- # print(term)
579
-
580
  batch = []
581
  min_length = 0
582
  max_length = 150
 
583
  for i in range(visible + 1):
584
  structure = struct_ref[2 * i]
585
  copy = struct_ref[2 * i + 1]
586
- if len((structure + copy).strip()) > 0:
587
- if len(copy.strip()) > 0 and len(structure.strip()) > 0:
588
- print('------------')
589
- print("Using both copy and structure")
590
- # print("Improving structure")
591
- # structure = improve_structure(chat=chat, structure=structure)
592
- messages = [
593
- SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
594
- HumanMessage(content=struct_copy_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
595
-
596
- elif len(copy.strip()) > 0:
597
- print('------------')
598
- print("Using copy")
599
- messages = [
600
- SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
601
- HumanMessage(content=copy_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
602
- print(messages[1].content)
603
- print('------------')
604
-
605
- elif len(structure.strip()) > 0:
606
- print('------------')
607
- print("Using structure")
608
- # print("Improving structure")
609
- # structure = improve_structure(chat=chat, structure=structure)
610
  messages = [
611
- SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
612
- HumanMessage(content=struct_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
613
- print(messages[1].content)
614
- print('------------')
615
- batch.append(messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
 
617
  descriptions = ""
618
-
619
- response = chat.batch(batch)
620
- print(response)
621
 
622
- parser = JsonOutputParser()
623
- jresponse = [parser.parse(msg.content) for msg in response]
624
  descriptions = []
625
  descriptions_post = []
626
- for i, jr in enumerate(jresponse):
627
- print(f'{jr=}')
628
- bestid = jr["best_version"]["id"]
629
- for d in jr["versions"]:
630
- if d["id"] == bestid:
631
- bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {jr['best_version']['explanation']}" if debug else "")
632
- bests = d["score"]
633
- break
634
- # bests = 0
635
- # bestd = ""
636
- # for d in jr:
637
- # print(f'{d["score"]=}, {d["id"]=}, {bests=}')
638
- # if d["score"] > bests:
639
- # bests = d["score"]
640
- # bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
641
- # elif d["score"] == bests and random.random() > 0.5:
642
- # bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
643
- # if d["id"] == bestid:
644
- # bests = d["score"]
645
- # bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
646
- # break
647
-
648
- bestd_post = post_process(text=bestd, guidance_prompt=guidance_prompt, language=languages[i], chat=chat)
 
 
 
 
 
 
 
 
 
 
 
 
649
 
650
- descriptions.append(bestd)
651
- descriptions_post.append(bestd_post)
652
 
653
- # print("\n\nRewriting with glossary")
654
- # rewrite = glossary_rewrite(chat=chat, glossary=glossary, text=descriptions[0])
655
- # if rewrite != "":
656
- # descriptions[0] = "Original:\n\n" + descriptions[0] + "\n\nRewritten:\n\n" + rewrite
657
- # print("\n\nDone rewriting with glossary\n\n")
658
 
659
- md_content = "\n\n---\n\n".join(descriptions)
660
 
661
- alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else ""
662
 
663
- alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {}
664
- result_json = {"outputs": jresponse if debug else descriptions, "alt_text": alt_text_dict}
665
 
666
  # post_content = post_process(text=md_content, guidance_prompt=guidance_prompt, language=languages, chat=chat)
667
 
@@ -721,7 +850,7 @@ with gr.Blocks() as demo:
721
  garment_type = gr.Textbox(label="Garment Type", value="all", lines=1, interactive=True)
722
  # language = gr.Dropdown(languages, value="American English", interactive=True, label="Language")
723
  with gr.Accordion(label="Advanced Options", open=False):
724
- model = gr.Dropdown(models, value="claude-3-7-sonnet-latest", interactive=True, label="Model", visible=True)
725
  temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
726
  nversions = gr.Slider(minimum=1, maximum=10, value=5, step=int, interactive=True, label="Number of versions", visible=True)
727
  excluded_phrases = gr.Textbox(label="Excluded words", interactive=True, lines=2)
 
9
  from langchain.schema import HumanMessage, SystemMessage, AIMessage
10
  from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages
11
  from langchain_google_genai import ChatGoogleGenerativeAI
12
+ # from langchain_groq import ChatGroq
13
  import openai
14
+ import google.generativeai as genai
15
 
16
  from langchain import hub
17
  from langchain_chroma import Chroma
 
101
  "Polish",
102
  "Portuguese"]
103
 
104
+ models = ["gpt-4o",
 
 
105
  "claude-3-7-sonnet-latest",
 
 
106
  "claude-3-5-sonnet-20240620",
107
  "claude-3-5-sonnet-20241022",
108
+ "gemini-2.0-flash-thinking-exp-01-21",
 
109
  ]
110
 
111
+ default_model = "gemini-2.0-flash-thinking-exp-01-21"
112
+
113
  openai.api_key = os.environ["OPENAI_API_KEY"]
114
 
115
  struct_copy_prompt = """Generate {nversions} versions of the product description for a product with the following information.
 
122
  The structure of the output should follow the reference structure.
123
  Do not use the structure of the reference copy in the output.
124
  Do not use any of the excluded words in the output.
125
+ Try to inlcude included words in the output when relevant.
126
+ Use the relevant information from the product features and intended use in the output.
127
  Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
128
  Note that the reference copy should be used for style and tone only, do not use any part of the reference copy in the output.
129
  Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
 
163
  }}
164
  Make sure that the output is in JSON format, no extra text should be included in the output.
165
 
166
+ <product_information>
167
+ <key_features>
168
+ {key_features}
169
+ </key_features>
170
 
171
+ <reference_structure>
172
+ {structure}
173
+ </reference_structure>
174
 
175
+ <reference_copy>
176
+ {copy}
177
+ </reference_copy>
178
 
179
+ <included_phrases>
180
+ {included_phrases}
181
+ </included_phrases>
182
 
183
+ <excluded_phrases>
184
+ {excluded_phrases}
185
+ </excluded_phrases>
186
+ </product_information>"""
187
 
188
 
189
  copy_prompt = """Generate {nversions} versions of the product description for a product with the following information.
 
194
  Use markdown format for each output.
195
  Make sure that the structure of each output follows the structure of the reference copy.
196
  Do not use any of the excluded words in the output.
197
+ Try to inlcude included words in the output when relevant.
198
+ Use the relevant information from the product features and intended use in the output.
199
  Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
200
  Note that the reference copy should be used for style and tone only, do not use any part of the reference copy in the output.
201
  Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
 
234
  }}
235
  Make sure that the output is in JSON format, no extra text should be included in the output.
236
 
237
+ <product_information>
238
+ <key_features>
239
+ {key_features}
240
+ </key_features>
241
 
242
+ <reference_copy>
243
+ {copy}
244
+ </reference_copy>
245
 
246
+ <included_phrases>
247
+ {included_phrases}
248
+ </included_phrases>
249
 
250
+ <excluded_phrases>
251
+ {excluded_phrases}
252
+ </excluded_phrases>
253
+ </product_information>"""
254
 
255
 
256
  struct_prompt = """Generate {nversions} versions of the product description for a product with the following information.
 
261
  Do not include any part of the reference structure in the output.
262
  Make sure that the structure of each output follows the reference structure.
263
  Do not use any of the excluded words in the output.
264
+ Try to inlcude included words in the output when relevant.
265
+ Use the relevant information from the product features and intended use in the output.
266
  Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
267
  Note that the reference structure should be used for structure only, do not use any part of the reference structure in the output.
268
  Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
 
301
  }}
302
  Make sure that the output is in JSON format, no extra text should be included in the output.
303
 
304
+ <product_information>
305
+ <key_features>
306
+ {key_features}
307
+ </key_features>
308
+
309
+ <reference_structure>
310
+ {structure}
311
+ </reference_structure>
312
+
313
+ <included_phrases>
314
+ {included_phrases}
315
+ </included_phrases>
316
+
317
+ <excluded_phrases>
318
+ {excluded_phrases}
319
+ </excluded_phrases>
320
+ </product_information>"""
321
+
322
+ evaluation_prompt = """You will be given information of a product and a list of product descriptions.
323
+ Evaluate the quality of the product descriptions based on the following criteria:
324
+ - how faithful it describes the product features.
325
+ - how well it follows the reference structure.
326
+ - how well it follows the tone of voice, rythm, cadence and style of the reference copy.
327
+ - how well it avoid the excluded words.
328
+ - how well it includes the included words.
329
+ - how creative the language is.
330
+ Give a score between 0 and 10 for each product description based on the above criteria.
331
+ Return the result in the following JSON format:
332
+ {{
333
+ "versions": [
334
+ {{
335
+ "id": 1,
336
+ "content": The first product description,
337
+ "explanation": A less than 20 word explanation of the score of the first product description,
338
+ "score": The score of the first product description
339
+ }},
340
+ {{
341
+ "id": 2,
342
+ "content": The second product description,
343
+ "explanation": A less than 20 word explanation of the score of the first product description,
344
+ "score": The score of the second product description
345
+ }},
346
+ ...
347
+ ],
348
+ "best_version": {{
349
+ "explanation": Explanation for why this version is the best,
350
+ "id": The id of the best version
351
+ }}
352
+ }}
353
+ Make sure that the output is in JSON format, no extra text should be included in the output.
354
+
355
+ <product_information>
356
+ <key_features>
357
+ {key_features}
358
+ </key_features>
359
+
360
+ <reference_structure>
361
+ {structure}
362
+ </reference_structure>
363
 
364
+ <reference_copy>
365
+ {copy}
366
+ </reference_copy>
367
 
368
+ <included_phrases>
369
+ {included_phrases}
370
+ </included_phrases>
371
 
372
+ <excluded_phrases>
373
+ {excluded_phrases}
374
+ </excluded_phrases>
375
+ </product_information>
376
+
377
+ <product_descriptions>
378
+ {product_descriptions}
379
+ </product_descriptions>"""
380
 
381
 
382
  improve_structure_prompt = """You are given a structure for a product description.
 
385
 
386
  <structure>/n{structure}</structure>"""
387
 
388
+ gemini_prompt = """You are given information of a product, a reference structure, and a reference copy.
389
+ Please analyze the structure, make a plan on how to follow the structure correctly, and write a product
390
+ description for the product. Use the tone of voice of the reference copy for the generated description.
391
+ Write from {min_length} to {max_length} words.
392
+ Do not hallucinate, do not add information that is not in the product information.
393
+ Try your best to avoid using the excluded words and phrases.
394
+ Try your best to include the included words and phrases.
395
+ Do not enclose the output in html tags, quotes, braces, brackets or anything.
396
+ Return the product description only.
397
+
398
+ <product_information>
399
+ {key_features}
400
+ </product_information>
401
+
402
+ <reference_structure>
403
+ {structure}
404
+ </reference_structure>
405
+
406
+ <reference_copy>
407
+ {copy}
408
+ </reference_copy>
409
+
410
+ <excluded_phrases>
411
+ {excluded_phrases}
412
+ </excluded_phrases>
413
+
414
+ <included_phrases>
415
+ {included_phrases}
416
+ </included_phrases>"""
417
+
418
 
419
  import base64
420
  import requests
 
551
 
552
  def post_process(text: str, guidance_prompt: str, language: str, chat: ChatOpenAI):
553
  messages = [
554
+ SystemMessage(content=f"""You are a helpful assistant that edit documents based on the guidelines provided.
555
+ Make sure to write in {language} language."""),
556
  HumanMessage(content=f"""Given the following product description, your task is to
557
  make minimal modification to the product description such that the resulting description
558
+ follows the rules defined in the guidelines. Make sure to preserve the structure of the
559
  original text as much as possible. Do not modify the structure of the original text.
560
  Do not change the language of the original text.
561
  Output only the modified text in markdown format.
 
568
  {guidance_prompt}""")
569
  ]
570
 
571
+ if chat is None:
572
+ chat = ChatAnthropic(model_name="claude-3-7-sonnet-latest",
573
+ anthropic_api_key=os.environ["ANTHROPIC_API_KEY"],
574
+ max_tokens_to_sample=4096,
575
+ temperature=0.0)
576
  response = chat.invoke(messages, temperature=0.0)
577
  text = response.content
578
  return text
 
582
  if model_name.startswith("gpt"):
583
  chat = ChatOpenAI(model=model_name, max_tokens=4096, temperature=temperature)
584
  elif model_name.startswith("claude"):
585
+ chat = ChatAnthropic(model_name=model_name,
586
+ anthropic_api_key=os.environ["ANTHROPIC_API_KEY"],
587
+ max_tokens_to_sample=4096,
588
+ temperature=temperature)
589
  elif model_name.startswith("gemini"):
590
+ # chat = ChatGoogleGenerativeAI(model=model_name,
591
+ # api_key=os.environ["GOOGLE_API_KEY"])
592
+ chat = genai.GenerativeModel(model_name)
593
  else:
594
+ chat = None
595
+ raise ValueError(f"Model {model_name} not supported")
596
  return chat
597
 
598
 
 
609
  return retriever
610
 
611
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
  def improve_structure(chat: ChatOpenAI, structure: str):
613
  messages = [
614
  HumanMessage(content=improve_structure_prompt.format(structure=structure)),]
 
618
  return response.content
619
 
620
 
621
+ def evaluate(descriptions,
622
+ reference_structure,
623
+ reference_copy,
624
+ key_features,
625
+ included_phrases,
626
+ excluded_phrases,
627
+ language,
628
+ chat):
629
+ messages = [
630
+ SystemMessage(content=f"""You are a helpful assistant that evaluates product descriptions based on the guidelines provided. Make sure to write in {language} language."""),
631
+ HumanMessage(content=evaluation_prompt.format(key_features=key_features,
632
+ structure=reference_structure,
633
+ copy=reference_copy,
634
+ included_phrases=included_phrases,
635
+ excluded_phrases=excluded_phrases,
636
+ product_descriptions=descriptions)),]
637
+
638
+ response = chat.invoke(messages, temperature=0.0)
639
+ print(response)
640
+ return response
641
+
642
+
643
  def generate(*data):
644
  global visible
645
  print("visible", visible)
 
656
  print(f"{excluded_phrases=}")
657
  print(f"{included_phrases=}")
658
  print(f"{debug=}")
 
 
 
 
 
 
 
659
 
660
  chat = get_model(model, temperature=temperature)
661
 
 
681
 
682
  key_features = key_features + ", " + detected_features + "\nIntended uses: " + intended_use
683
 
 
 
 
 
 
 
684
  batch = []
685
  min_length = 0
686
  max_length = 150
687
+ response = []
688
  for i in range(visible + 1):
689
  structure = struct_ref[2 * i]
690
  copy = struct_ref[2 * i + 1]
691
+
692
+ if model.startswith("gemini"):
693
+ if len((structure + copy).strip()) > 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
  messages = [
695
+ gemini_prompt.format(min_length=min_length,
696
+ max_length=max_length,
697
+ key_features=key_features,
698
+ structure=structure,
699
+ copy=copy,
700
+ included_phrases=included_phrases,
701
+ excluded_phrases=excluded_phrases)
702
+ ]
703
+
704
+ batch.append(messages)
705
+
706
+ ri = chat.generate_content(messages)
707
+ print("Gemini response: ", ri)
708
+ response.append(ri)
709
+ else:
710
+ if len((structure + copy).strip()) > 0:
711
+ if len(copy.strip()) > 0 and len(structure.strip()) > 0:
712
+ print('------------')
713
+ print("Using both copy and structure")
714
+ # print("Improving structure")
715
+ # structure = improve_structure(chat=chat, structure=structure)
716
+ messages = [
717
+ SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
718
+ HumanMessage(content=struct_copy_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
719
+
720
+ elif len(copy.strip()) > 0:
721
+ print('------------')
722
+ print("Using copy")
723
+ messages = [
724
+ SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
725
+ HumanMessage(content=copy_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
726
+ print(messages[1].content)
727
+ print('------------')
728
+
729
+ elif len(structure.strip()) > 0:
730
+ print('------------')
731
+ print("Using structure")
732
+ # print("Improving structure")
733
+ # structure = improve_structure(chat=chat, structure=structure)
734
+ messages = [
735
+ SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
736
+ HumanMessage(content=struct_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
737
+ print(messages[1].content)
738
+ print('------------')
739
+ batch.append(messages)
740
+
741
+ response = chat.batch(batch)
742
 
743
  descriptions = ""
 
 
 
744
 
 
 
745
  descriptions = []
746
  descriptions_post = []
747
+
748
+ if model.startswith("gemini"):
749
+ descriptions = [msg.text for msg in response]
750
+ descriptions_post = [post_process(text=desc,
751
+ guidance_prompt=guidance_prompt,
752
+ language=languages[i],
753
+ chat=None) for i, desc in enumerate(descriptions)]
754
+
755
+ alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else ""
756
+ alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {}
757
+ result_json = {"outputs": descriptions, "alt_text": alt_text_dict}
758
+ else:
759
+ parser = JsonOutputParser()
760
+ jresponse = [parser.parse(msg.content) for msg in response]
761
+ for i, jr in enumerate(jresponse):
762
+ print(f'{jr=}')
763
+ bestid = jr["best_version"]["id"]
764
+ for d in jr["versions"]:
765
+ if d["id"] == bestid:
766
+ bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {jr['best_version']['explanation']}" if debug else "")
767
+ bests = d["score"]
768
+ break
769
+
770
+ evaluated = evaluate(descriptions=jr["versions"],
771
+ reference_structure=struct_ref[2 * i],
772
+ reference_copy=struct_ref[2 * i + 1],
773
+ key_features=key_features,
774
+ included_phrases=included_phrases,
775
+ excluded_phrases=excluded_phrases,
776
+ language=languages[i], chat=chat)
777
+ print(f'{evaluated=}')
778
+
779
+ bestd_post = post_process(text=bestd,
780
+ guidance_prompt=guidance_prompt,
781
+ language=languages[i], chat=chat)
782
 
783
+ descriptions.append(bestd)
784
+ descriptions_post.append(bestd_post)
785
 
786
+ alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else ""
787
+ alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {}
 
 
 
788
 
789
+ result_json = {"outputs": jresponse if debug else descriptions, "alt_text": alt_text_dict}
790
 
791
+ md_content = "\n\n---\n\n".join(descriptions)
792
 
793
+
 
794
 
795
  # post_content = post_process(text=md_content, guidance_prompt=guidance_prompt, language=languages, chat=chat)
796
 
 
850
  garment_type = gr.Textbox(label="Garment Type", value="all", lines=1, interactive=True)
851
  # language = gr.Dropdown(languages, value="American English", interactive=True, label="Language")
852
  with gr.Accordion(label="Advanced Options", open=False):
853
+ model = gr.Dropdown(models, value=default_model, interactive=True, label="Model", visible=True)
854
  temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
855
  nversions = gr.Slider(minimum=1, maximum=10, value=5, step=int, interactive=True, label="Number of versions", visible=True)
856
  excluded_phrases = gr.Textbox(label="Excluded words", interactive=True, lines=2)
requirements.txt CHANGED
@@ -8,4 +8,5 @@ bcrypt
8
  langchain_chroma
9
  langchainhub
10
  langchain_community
11
- langchain-google-genai
 
 
8
  langchain_chroma
9
  langchainhub
10
  langchain_community
11
+ langchain-google-genai
12
+ google-generativeai
str2escaped.py CHANGED
@@ -203,9 +203,40 @@ Return the result in in the following JSON format without any preceding or trail
203
  "category": the category of the garment, for example 'dress', 'shirt', 'pants', 'shoes', etc
204
  }}"""
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  import codecs
207
  if __name__ == "__main__":
208
- print(str2escaped(detect_feature))
209
  # print()
210
  # print('"write_struct_copy": "' + str2escaped(both).strip() + '",')
211
  # print()
 
203
  "category": the category of the garment, for example 'dress', 'shirt', 'pants', 'shoes', etc
204
  }}"""
205
 
206
+
207
+ gemini_prompt = """You are given information of a product, a reference structure, and a reference copy.
208
+ Please analyze the structure, make a plan on how to follow the structure correctly, and write a product
209
+ description for the product. Use the tone of voice of the reference copy for the generated description.
210
+ Write from {min_length} to {max_length} words.
211
+ Do not hallucinate, do not add information that is not in the product information.
212
+ Try your best to avoid using the excluded words and phrases.
213
+ Try your best to include the included words and phrases.
214
+ Do not enclose the output in html tags, quotes, braces, brackets or anything.
215
+ Return the product description only.
216
+
217
+ <product_information>
218
+ {key_features}
219
+ </product_information>
220
+
221
+ <reference_structure>
222
+ {structure}
223
+ </reference_structure>
224
+
225
+ <reference_copy>
226
+ {copy}
227
+ </reference_copy>
228
+
229
+ <excluded_phrases>
230
+ {excluded_phrases}
231
+ </excluded_phrases>
232
+
233
+ <included_phrases>
234
+ {included_phrases}
235
+ </included_phrases>"""
236
+
237
  import codecs
238
  if __name__ == "__main__":
239
+ print(str2escaped(gemini_prompt))
240
  # print()
241
  # print('"write_struct_copy": "' + str2escaped(both).strip() + '",')
242
  # print()