Spaces:
Paused
Paused
Add gemini-2.0-flash-thinking
Browse files- app.py +279 -150
- requirements.txt +2 -1
- str2escaped.py +32 -1
app.py
CHANGED
|
@@ -9,8 +9,9 @@ from langchain_openai.chat_models import ChatOpenAI
|
|
| 9 |
from langchain.schema import HumanMessage, SystemMessage, AIMessage
|
| 10 |
from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages
|
| 11 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 12 |
-
from langchain_groq import ChatGroq
|
| 13 |
import openai
|
|
|
|
| 14 |
|
| 15 |
from langchain import hub
|
| 16 |
from langchain_chroma import Chroma
|
|
@@ -100,18 +101,15 @@ languages = ["American English",
|
|
| 100 |
"Polish",
|
| 101 |
"Portuguese"]
|
| 102 |
|
| 103 |
-
models = ["gpt-
|
| 104 |
-
"gpt-4o",
|
| 105 |
-
"gpt-3.5-turbo",
|
| 106 |
"claude-3-7-sonnet-latest",
|
| 107 |
-
"claude-3-sonnet-20240229",
|
| 108 |
-
"claude-3-opus-20240229",
|
| 109 |
"claude-3-5-sonnet-20240620",
|
| 110 |
"claude-3-5-sonnet-20241022",
|
| 111 |
-
"gemini-
|
| 112 |
-
#"llama3-70b-8192",
|
| 113 |
]
|
| 114 |
|
|
|
|
|
|
|
| 115 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
| 116 |
|
| 117 |
struct_copy_prompt = """Generate {nversions} versions of the product description for a product with the following information.
|
|
@@ -124,8 +122,8 @@ Do not include any part of the reference structure in the output.
|
|
| 124 |
The structure of the output should follow the reference structure.
|
| 125 |
Do not use the structure of the reference copy in the output.
|
| 126 |
Do not use any of the excluded words in the output.
|
| 127 |
-
|
| 128 |
-
|
| 129 |
Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
|
| 130 |
Note that the reference copy should be used for style and tone only, do not use any part of the reference copy in the output.
|
| 131 |
Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
|
|
@@ -165,16 +163,27 @@ Return the result in the following JSON format:
|
|
| 165 |
}}
|
| 166 |
Make sure that the output is in JSON format, no extra text should be included in the output.
|
| 167 |
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
| 170 |
|
| 171 |
-
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
|
|
|
|
|
|
|
| 174 |
|
| 175 |
-
|
|
|
|
|
|
|
| 176 |
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
|
| 180 |
copy_prompt = """Generate {nversions} versions of the product description for a product with the following information.
|
|
@@ -185,8 +194,8 @@ Make sure to use the tone of voice, rythm, cadence and style of the reference co
|
|
| 185 |
Use markdown format for each output.
|
| 186 |
Make sure that the structure of each output follows the structure of the reference copy.
|
| 187 |
Do not use any of the excluded words in the output.
|
| 188 |
-
|
| 189 |
-
|
| 190 |
Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
|
| 191 |
Note that the reference copy should be used for style and tone only, do not use any part of the reference copy in the output.
|
| 192 |
Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
|
|
@@ -225,14 +234,23 @@ Return the result in the following JSON format:
|
|
| 225 |
}}
|
| 226 |
Make sure that the output is in JSON format, no extra text should be included in the output.
|
| 227 |
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
| 230 |
|
| 231 |
-
|
|
|
|
|
|
|
| 232 |
|
| 233 |
-
|
|
|
|
|
|
|
| 234 |
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
|
| 238 |
struct_prompt = """Generate {nversions} versions of the product description for a product with the following information.
|
|
@@ -243,8 +261,8 @@ Use markdown format for each output.
|
|
| 243 |
Do not include any part of the reference structure in the output.
|
| 244 |
Make sure that the structure of each output follows the reference structure.
|
| 245 |
Do not use any of the excluded words in the output.
|
| 246 |
-
|
| 247 |
-
|
| 248 |
Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
|
| 249 |
Note that the reference structure should be used for structure only, do not use any part of the reference structure in the output.
|
| 250 |
Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
|
|
@@ -283,14 +301,82 @@ Return the result in the following JSON format:
|
|
| 283 |
}}
|
| 284 |
Make sure that the output is in JSON format, no extra text should be included in the output.
|
| 285 |
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
|
| 289 |
-
|
|
|
|
|
|
|
| 290 |
|
| 291 |
-
|
|
|
|
|
|
|
| 292 |
|
| 293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
|
| 296 |
improve_structure_prompt = """You are given a structure for a product description.
|
|
@@ -299,6 +385,36 @@ Return the reformatted structure only. Do not add any preceding or trailing char
|
|
| 299 |
|
| 300 |
<structure>/n{structure}</structure>"""
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
import base64
|
| 304 |
import requests
|
|
@@ -435,10 +551,11 @@ def get_language(struct_lang, copy_lang):
|
|
| 435 |
|
| 436 |
def post_process(text: str, guidance_prompt: str, language: str, chat: ChatOpenAI):
|
| 437 |
messages = [
|
| 438 |
-
SystemMessage(content=f"""You are a helpful assistant that edit documents based on the
|
|
|
|
| 439 |
HumanMessage(content=f"""Given the following product description, your task is to
|
| 440 |
make minimal modification to the product description such that the resulting description
|
| 441 |
-
follows the rules defined in the
|
| 442 |
original text as much as possible. Do not modify the structure of the original text.
|
| 443 |
Do not change the language of the original text.
|
| 444 |
Output only the modified text in markdown format.
|
|
@@ -451,6 +568,11 @@ Guidelines:
|
|
| 451 |
{guidance_prompt}""")
|
| 452 |
]
|
| 453 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
response = chat.invoke(messages, temperature=0.0)
|
| 455 |
text = response.content
|
| 456 |
return text
|
|
@@ -460,11 +582,17 @@ def get_model(model_name, temperature=0.0):
|
|
| 460 |
if model_name.startswith("gpt"):
|
| 461 |
chat = ChatOpenAI(model=model_name, max_tokens=4096, temperature=temperature)
|
| 462 |
elif model_name.startswith("claude"):
|
| 463 |
-
chat = ChatAnthropic(model_name=model_name,
|
|
|
|
|
|
|
|
|
|
| 464 |
elif model_name.startswith("gemini"):
|
| 465 |
-
chat = ChatGoogleGenerativeAI(model=model_name,
|
|
|
|
|
|
|
| 466 |
else:
|
| 467 |
-
chat =
|
|
|
|
| 468 |
return chat
|
| 469 |
|
| 470 |
|
|
@@ -481,39 +609,6 @@ def build_glossary(glossary_file, fieldnames=None) -> VectorStoreRetriever:
|
|
| 481 |
return retriever
|
| 482 |
|
| 483 |
|
| 484 |
-
def glossary_rewrite(chat: ChatOpenAI, glossary: VectorStoreRetriever, text: str):
|
| 485 |
-
try:
|
| 486 |
-
terms = glossary.invoke(input=text)
|
| 487 |
-
print("\n".join([d.page_content for d in terms]))
|
| 488 |
-
glossary_str = "\n\n".join([d.page_content.replace('\n', '. ') for d in terms])
|
| 489 |
-
|
| 490 |
-
if len(terms) > 0:
|
| 491 |
-
messages = [
|
| 492 |
-
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in English language."""),
|
| 493 |
-
HumanMessage(content=f"""Rewrite the following text using the terms in the glossary.
|
| 494 |
-
Preserve the original text as much as possible.
|
| 495 |
-
Replace the terms in original text that match the definition with the corresponding terms in the glossary.
|
| 496 |
-
Output only the rewritten text in markdown format.
|
| 497 |
-
|
| 498 |
-
Terms, Definitions
|
| 499 |
-
{glossary_str}
|
| 500 |
-
|
| 501 |
-
Text to rewrite:
|
| 502 |
-
{text}
|
| 503 |
-
"""),]
|
| 504 |
-
|
| 505 |
-
print(f"HumanMessage={messages[1].content}")
|
| 506 |
-
response = chat.invoke(messages, temperature=0.0)
|
| 507 |
-
print(f"Response=\n{response.content}")
|
| 508 |
-
return response.content
|
| 509 |
-
except Exception as e:
|
| 510 |
-
print(e.__class__, e)
|
| 511 |
-
traceback.print_exc()
|
| 512 |
-
terms = []
|
| 513 |
-
|
| 514 |
-
return ""
|
| 515 |
-
|
| 516 |
-
|
| 517 |
def improve_structure(chat: ChatOpenAI, structure: str):
|
| 518 |
messages = [
|
| 519 |
HumanMessage(content=improve_structure_prompt.format(structure=structure)),]
|
|
@@ -523,6 +618,28 @@ def improve_structure(chat: ChatOpenAI, structure: str):
|
|
| 523 |
return response.content
|
| 524 |
|
| 525 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 526 |
def generate(*data):
|
| 527 |
global visible
|
| 528 |
print("visible", visible)
|
|
@@ -539,13 +656,6 @@ def generate(*data):
|
|
| 539 |
print(f"{excluded_phrases=}")
|
| 540 |
print(f"{included_phrases=}")
|
| 541 |
print(f"{debug=}")
|
| 542 |
-
# print(f"{glossary=}")
|
| 543 |
-
print(f"{glossary_upload=}")
|
| 544 |
-
# print(f"{struct_ref=}")
|
| 545 |
-
|
| 546 |
-
glossary = None
|
| 547 |
-
if glossary_upload is not None:
|
| 548 |
-
glossary = build_glossary(glossary_upload)
|
| 549 |
|
| 550 |
chat = get_model(model, temperature=temperature)
|
| 551 |
|
|
@@ -571,97 +681,116 @@ def generate(*data):
|
|
| 571 |
|
| 572 |
key_features = key_features + ", " + detected_features + "\nIntended uses: " + intended_use
|
| 573 |
|
| 574 |
-
# if glossary:
|
| 575 |
-
# print("Getting terms")
|
| 576 |
-
# terms = glossary.invoke(input=feature + detected_features)
|
| 577 |
-
# for term in terms:
|
| 578 |
-
# print(term)
|
| 579 |
-
|
| 580 |
batch = []
|
| 581 |
min_length = 0
|
| 582 |
max_length = 150
|
|
|
|
| 583 |
for i in range(visible + 1):
|
| 584 |
structure = struct_ref[2 * i]
|
| 585 |
copy = struct_ref[2 * i + 1]
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
print("Using both copy and structure")
|
| 590 |
-
# print("Improving structure")
|
| 591 |
-
# structure = improve_structure(chat=chat, structure=structure)
|
| 592 |
-
messages = [
|
| 593 |
-
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
|
| 594 |
-
HumanMessage(content=struct_copy_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
|
| 595 |
-
|
| 596 |
-
elif len(copy.strip()) > 0:
|
| 597 |
-
print('------------')
|
| 598 |
-
print("Using copy")
|
| 599 |
-
messages = [
|
| 600 |
-
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
|
| 601 |
-
HumanMessage(content=copy_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
|
| 602 |
-
print(messages[1].content)
|
| 603 |
-
print('------------')
|
| 604 |
-
|
| 605 |
-
elif len(structure.strip()) > 0:
|
| 606 |
-
print('------------')
|
| 607 |
-
print("Using structure")
|
| 608 |
-
# print("Improving structure")
|
| 609 |
-
# structure = improve_structure(chat=chat, structure=structure)
|
| 610 |
messages = [
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 616 |
|
| 617 |
descriptions = ""
|
| 618 |
-
|
| 619 |
-
response = chat.batch(batch)
|
| 620 |
-
print(response)
|
| 621 |
|
| 622 |
-
parser = JsonOutputParser()
|
| 623 |
-
jresponse = [parser.parse(msg.content) for msg in response]
|
| 624 |
descriptions = []
|
| 625 |
descriptions_post = []
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
|
| 650 |
-
|
| 651 |
-
|
| 652 |
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
# if rewrite != "":
|
| 656 |
-
# descriptions[0] = "Original:\n\n" + descriptions[0] + "\n\nRewritten:\n\n" + rewrite
|
| 657 |
-
# print("\n\nDone rewriting with glossary\n\n")
|
| 658 |
|
| 659 |
-
|
| 660 |
|
| 661 |
-
|
| 662 |
|
| 663 |
-
|
| 664 |
-
result_json = {"outputs": jresponse if debug else descriptions, "alt_text": alt_text_dict}
|
| 665 |
|
| 666 |
# post_content = post_process(text=md_content, guidance_prompt=guidance_prompt, language=languages, chat=chat)
|
| 667 |
|
|
@@ -721,7 +850,7 @@ with gr.Blocks() as demo:
|
|
| 721 |
garment_type = gr.Textbox(label="Garment Type", value="all", lines=1, interactive=True)
|
| 722 |
# language = gr.Dropdown(languages, value="American English", interactive=True, label="Language")
|
| 723 |
with gr.Accordion(label="Advanced Options", open=False):
|
| 724 |
-
model = gr.Dropdown(models, value=
|
| 725 |
temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
|
| 726 |
nversions = gr.Slider(minimum=1, maximum=10, value=5, step=int, interactive=True, label="Number of versions", visible=True)
|
| 727 |
excluded_phrases = gr.Textbox(label="Excluded words", interactive=True, lines=2)
|
|
|
|
| 9 |
from langchain.schema import HumanMessage, SystemMessage, AIMessage
|
| 10 |
from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages
|
| 11 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 12 |
+
# from langchain_groq import ChatGroq
|
| 13 |
import openai
|
| 14 |
+
import google.generativeai as genai
|
| 15 |
|
| 16 |
from langchain import hub
|
| 17 |
from langchain_chroma import Chroma
|
|
|
|
| 101 |
"Polish",
|
| 102 |
"Portuguese"]
|
| 103 |
|
| 104 |
+
models = ["gpt-4o",
|
|
|
|
|
|
|
| 105 |
"claude-3-7-sonnet-latest",
|
|
|
|
|
|
|
| 106 |
"claude-3-5-sonnet-20240620",
|
| 107 |
"claude-3-5-sonnet-20241022",
|
| 108 |
+
"gemini-2.0-flash-thinking-exp-01-21",
|
|
|
|
| 109 |
]
|
| 110 |
|
| 111 |
+
default_model = "gemini-2.0-flash-thinking-exp-01-21"
|
| 112 |
+
|
| 113 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
| 114 |
|
| 115 |
struct_copy_prompt = """Generate {nversions} versions of the product description for a product with the following information.
|
|
|
|
| 122 |
The structure of the output should follow the reference structure.
|
| 123 |
Do not use the structure of the reference copy in the output.
|
| 124 |
Do not use any of the excluded words in the output.
|
| 125 |
+
Try to inlcude included words in the output when relevant.
|
| 126 |
+
Use the relevant information from the product features and intended use in the output.
|
| 127 |
Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
|
| 128 |
Note that the reference copy should be used for style and tone only, do not use any part of the reference copy in the output.
|
| 129 |
Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
|
|
|
|
| 163 |
}}
|
| 164 |
Make sure that the output is in JSON format, no extra text should be included in the output.
|
| 165 |
|
| 166 |
+
<product_information>
|
| 167 |
+
<key_features>
|
| 168 |
+
{key_features}
|
| 169 |
+
</key_features>
|
| 170 |
|
| 171 |
+
<reference_structure>
|
| 172 |
+
{structure}
|
| 173 |
+
</reference_structure>
|
| 174 |
|
| 175 |
+
<reference_copy>
|
| 176 |
+
{copy}
|
| 177 |
+
</reference_copy>
|
| 178 |
|
| 179 |
+
<included_phrases>
|
| 180 |
+
{included_phrases}
|
| 181 |
+
</included_phrases>
|
| 182 |
|
| 183 |
+
<excluded_phrases>
|
| 184 |
+
{excluded_phrases}
|
| 185 |
+
</excluded_phrases>
|
| 186 |
+
</product_information>"""
|
| 187 |
|
| 188 |
|
| 189 |
copy_prompt = """Generate {nversions} versions of the product description for a product with the following information.
|
|
|
|
| 194 |
Use markdown format for each output.
|
| 195 |
Make sure that the structure of each output follows the structure of the reference copy.
|
| 196 |
Do not use any of the excluded words in the output.
|
| 197 |
+
Try to inlcude included words in the output when relevant.
|
| 198 |
+
Use the relevant information from the product features and intended use in the output.
|
| 199 |
Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
|
| 200 |
Note that the reference copy should be used for style and tone only, do not use any part of the reference copy in the output.
|
| 201 |
Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
|
|
|
|
| 234 |
}}
|
| 235 |
Make sure that the output is in JSON format, no extra text should be included in the output.
|
| 236 |
|
| 237 |
+
<product_information>
|
| 238 |
+
<key_features>
|
| 239 |
+
{key_features}
|
| 240 |
+
</key_features>
|
| 241 |
|
| 242 |
+
<reference_copy>
|
| 243 |
+
{copy}
|
| 244 |
+
</reference_copy>
|
| 245 |
|
| 246 |
+
<included_phrases>
|
| 247 |
+
{included_phrases}
|
| 248 |
+
</included_phrases>
|
| 249 |
|
| 250 |
+
<excluded_phrases>
|
| 251 |
+
{excluded_phrases}
|
| 252 |
+
</excluded_phrases>
|
| 253 |
+
</product_information>"""
|
| 254 |
|
| 255 |
|
| 256 |
struct_prompt = """Generate {nversions} versions of the product description for a product with the following information.
|
|
|
|
| 261 |
Do not include any part of the reference structure in the output.
|
| 262 |
Make sure that the structure of each output follows the reference structure.
|
| 263 |
Do not use any of the excluded words in the output.
|
| 264 |
+
Try to inlcude included words in the output when relevant.
|
| 265 |
+
Use the relevant information from the product features and intended use in the output.
|
| 266 |
Do not hallucinate any information about the product, use only the provided key features and intended use to write about the product.
|
| 267 |
Note that the reference structure should be used for structure only, do not use any part of the reference structure in the output.
|
| 268 |
Do not hallucinate information about size and weight. Write about size and weight only if it is available in the list of features.
|
|
|
|
| 301 |
}}
|
| 302 |
Make sure that the output is in JSON format, no extra text should be included in the output.
|
| 303 |
|
| 304 |
+
<product_information>
|
| 305 |
+
<key_features>
|
| 306 |
+
{key_features}
|
| 307 |
+
</key_features>
|
| 308 |
+
|
| 309 |
+
<reference_structure>
|
| 310 |
+
{structure}
|
| 311 |
+
</reference_structure>
|
| 312 |
+
|
| 313 |
+
<included_phrases>
|
| 314 |
+
{included_phrases}
|
| 315 |
+
</included_phrases>
|
| 316 |
+
|
| 317 |
+
<excluded_phrases>
|
| 318 |
+
{excluded_phrases}
|
| 319 |
+
</excluded_phrases>
|
| 320 |
+
</product_information>"""
|
| 321 |
+
|
| 322 |
+
evaluation_prompt = """You will be given information of a product and a list of product descriptions.
|
| 323 |
+
Evaluate the quality of the product descriptions based on the following criteria:
|
| 324 |
+
- how faithful it describes the product features.
|
| 325 |
+
- how well it follows the reference structure.
|
| 326 |
+
- how well it follows the tone of voice, rythm, cadence and style of the reference copy.
|
| 327 |
+
- how well it avoid the excluded words.
|
| 328 |
+
- how well it includes the included words.
|
| 329 |
+
- how creative the language is.
|
| 330 |
+
Give a score between 0 and 10 for each product description based on the above criteria.
|
| 331 |
+
Return the result in the following JSON format:
|
| 332 |
+
{{
|
| 333 |
+
"versions": [
|
| 334 |
+
{{
|
| 335 |
+
"id": 1,
|
| 336 |
+
"content": The first product description,
|
| 337 |
+
"explanation": A less than 20 word explanation of the score of the first product description,
|
| 338 |
+
"score": The score of the first product description
|
| 339 |
+
}},
|
| 340 |
+
{{
|
| 341 |
+
"id": 2,
|
| 342 |
+
"content": The second product description,
|
| 343 |
+
"explanation": A less than 20 word explanation of the score of the first product description,
|
| 344 |
+
"score": The score of the second product description
|
| 345 |
+
}},
|
| 346 |
+
...
|
| 347 |
+
],
|
| 348 |
+
"best_version": {{
|
| 349 |
+
"explanation": Explanation for why this version is the best,
|
| 350 |
+
"id": The id of the best version
|
| 351 |
+
}}
|
| 352 |
+
}}
|
| 353 |
+
Make sure that the output is in JSON format, no extra text should be included in the output.
|
| 354 |
+
|
| 355 |
+
<product_information>
|
| 356 |
+
<key_features>
|
| 357 |
+
{key_features}
|
| 358 |
+
</key_features>
|
| 359 |
+
|
| 360 |
+
<reference_structure>
|
| 361 |
+
{structure}
|
| 362 |
+
</reference_structure>
|
| 363 |
|
| 364 |
+
<reference_copy>
|
| 365 |
+
{copy}
|
| 366 |
+
</reference_copy>
|
| 367 |
|
| 368 |
+
<included_phrases>
|
| 369 |
+
{included_phrases}
|
| 370 |
+
</included_phrases>
|
| 371 |
|
| 372 |
+
<excluded_phrases>
|
| 373 |
+
{excluded_phrases}
|
| 374 |
+
</excluded_phrases>
|
| 375 |
+
</product_information>
|
| 376 |
+
|
| 377 |
+
<product_descriptions>
|
| 378 |
+
{product_descriptions}
|
| 379 |
+
</product_descriptions>"""
|
| 380 |
|
| 381 |
|
| 382 |
improve_structure_prompt = """You are given a structure for a product description.
|
|
|
|
| 385 |
|
| 386 |
<structure>/n{structure}</structure>"""
|
| 387 |
|
| 388 |
+
gemini_prompt = """You are given information of a product, a reference structure, and a reference copy.
|
| 389 |
+
Please analyze the structure, make a plan on how to follow the structure correctly, and write a product
|
| 390 |
+
description for the product. Use the tone of voice of the reference copy for the generated description.
|
| 391 |
+
Write from {min_length} to {max_length} words.
|
| 392 |
+
Do not hallucinate, do not add information that is not in the product information.
|
| 393 |
+
Try your best to avoid using the excluded words and phrases.
|
| 394 |
+
Try your best to include the included words and phrases.
|
| 395 |
+
Do not enclose the output in html tags, quotes, braces, brackets or anything.
|
| 396 |
+
Return the product description only.
|
| 397 |
+
|
| 398 |
+
<product_information>
|
| 399 |
+
{key_features}
|
| 400 |
+
</product_information>
|
| 401 |
+
|
| 402 |
+
<reference_structure>
|
| 403 |
+
{structure}
|
| 404 |
+
</reference_structure>
|
| 405 |
+
|
| 406 |
+
<reference_copy>
|
| 407 |
+
{copy}
|
| 408 |
+
</reference_copy>
|
| 409 |
+
|
| 410 |
+
<excluded_phrases>
|
| 411 |
+
{excluded_phrases}
|
| 412 |
+
</excluded_phrases>
|
| 413 |
+
|
| 414 |
+
<included_phrases>
|
| 415 |
+
{included_phrases}
|
| 416 |
+
</included_phrases>"""
|
| 417 |
+
|
| 418 |
|
| 419 |
import base64
|
| 420 |
import requests
|
|
|
|
| 551 |
|
| 552 |
def post_process(text: str, guidance_prompt: str, language: str, chat: ChatOpenAI):
|
| 553 |
messages = [
|
| 554 |
+
SystemMessage(content=f"""You are a helpful assistant that edit documents based on the guidelines provided.
|
| 555 |
+
Make sure to write in {language} language."""),
|
| 556 |
HumanMessage(content=f"""Given the following product description, your task is to
|
| 557 |
make minimal modification to the product description such that the resulting description
|
| 558 |
+
follows the rules defined in the guidelines. Make sure to preserve the structure of the
|
| 559 |
original text as much as possible. Do not modify the structure of the original text.
|
| 560 |
Do not change the language of the original text.
|
| 561 |
Output only the modified text in markdown format.
|
|
|
|
| 568 |
{guidance_prompt}""")
|
| 569 |
]
|
| 570 |
|
| 571 |
+
if chat is None:
|
| 572 |
+
chat = ChatAnthropic(model_name="claude-3-7-sonnet-latest",
|
| 573 |
+
anthropic_api_key=os.environ["ANTHROPIC_API_KEY"],
|
| 574 |
+
max_tokens_to_sample=4096,
|
| 575 |
+
temperature=0.0)
|
| 576 |
response = chat.invoke(messages, temperature=0.0)
|
| 577 |
text = response.content
|
| 578 |
return text
|
|
|
|
| 582 |
if model_name.startswith("gpt"):
|
| 583 |
chat = ChatOpenAI(model=model_name, max_tokens=4096, temperature=temperature)
|
| 584 |
elif model_name.startswith("claude"):
|
| 585 |
+
chat = ChatAnthropic(model_name=model_name,
|
| 586 |
+
anthropic_api_key=os.environ["ANTHROPIC_API_KEY"],
|
| 587 |
+
max_tokens_to_sample=4096,
|
| 588 |
+
temperature=temperature)
|
| 589 |
elif model_name.startswith("gemini"):
|
| 590 |
+
# chat = ChatGoogleGenerativeAI(model=model_name,
|
| 591 |
+
# api_key=os.environ["GOOGLE_API_KEY"])
|
| 592 |
+
chat = genai.GenerativeModel(model_name)
|
| 593 |
else:
|
| 594 |
+
chat = None
|
| 595 |
+
raise ValueError(f"Model {model_name} not supported")
|
| 596 |
return chat
|
| 597 |
|
| 598 |
|
|
|
|
| 609 |
return retriever
|
| 610 |
|
| 611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
def improve_structure(chat: ChatOpenAI, structure: str):
|
| 613 |
messages = [
|
| 614 |
HumanMessage(content=improve_structure_prompt.format(structure=structure)),]
|
|
|
|
| 618 |
return response.content
|
| 619 |
|
| 620 |
|
| 621 |
+
def evaluate(descriptions,
|
| 622 |
+
reference_structure,
|
| 623 |
+
reference_copy,
|
| 624 |
+
key_features,
|
| 625 |
+
included_phrases,
|
| 626 |
+
excluded_phrases,
|
| 627 |
+
language,
|
| 628 |
+
chat):
|
| 629 |
+
messages = [
|
| 630 |
+
SystemMessage(content=f"""You are a helpful assistant that evaluates product descriptions based on the guidelines provided. Make sure to write in {language} language."""),
|
| 631 |
+
HumanMessage(content=evaluation_prompt.format(key_features=key_features,
|
| 632 |
+
structure=reference_structure,
|
| 633 |
+
copy=reference_copy,
|
| 634 |
+
included_phrases=included_phrases,
|
| 635 |
+
excluded_phrases=excluded_phrases,
|
| 636 |
+
product_descriptions=descriptions)),]
|
| 637 |
+
|
| 638 |
+
response = chat.invoke(messages, temperature=0.0)
|
| 639 |
+
print(response)
|
| 640 |
+
return response
|
| 641 |
+
|
| 642 |
+
|
| 643 |
def generate(*data):
|
| 644 |
global visible
|
| 645 |
print("visible", visible)
|
|
|
|
| 656 |
print(f"{excluded_phrases=}")
|
| 657 |
print(f"{included_phrases=}")
|
| 658 |
print(f"{debug=}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
|
| 660 |
chat = get_model(model, temperature=temperature)
|
| 661 |
|
|
|
|
| 681 |
|
| 682 |
key_features = key_features + ", " + detected_features + "\nIntended uses: " + intended_use
|
| 683 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
batch = []
|
| 685 |
min_length = 0
|
| 686 |
max_length = 150
|
| 687 |
+
response = []
|
| 688 |
for i in range(visible + 1):
|
| 689 |
structure = struct_ref[2 * i]
|
| 690 |
copy = struct_ref[2 * i + 1]
|
| 691 |
+
|
| 692 |
+
if model.startswith("gemini"):
|
| 693 |
+
if len((structure + copy).strip()) > 0:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 694 |
messages = [
|
| 695 |
+
gemini_prompt.format(min_length=min_length,
|
| 696 |
+
max_length=max_length,
|
| 697 |
+
key_features=key_features,
|
| 698 |
+
structure=structure,
|
| 699 |
+
copy=copy,
|
| 700 |
+
included_phrases=included_phrases,
|
| 701 |
+
excluded_phrases=excluded_phrases)
|
| 702 |
+
]
|
| 703 |
+
|
| 704 |
+
batch.append(messages)
|
| 705 |
+
|
| 706 |
+
ri = chat.generate_content(messages)
|
| 707 |
+
print("Gemini response: ", ri)
|
| 708 |
+
response.append(ri)
|
| 709 |
+
else:
|
| 710 |
+
if len((structure + copy).strip()) > 0:
|
| 711 |
+
if len(copy.strip()) > 0 and len(structure.strip()) > 0:
|
| 712 |
+
print('------------')
|
| 713 |
+
print("Using both copy and structure")
|
| 714 |
+
# print("Improving structure")
|
| 715 |
+
# structure = improve_structure(chat=chat, structure=structure)
|
| 716 |
+
messages = [
|
| 717 |
+
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
|
| 718 |
+
HumanMessage(content=struct_copy_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
|
| 719 |
+
|
| 720 |
+
elif len(copy.strip()) > 0:
|
| 721 |
+
print('------------')
|
| 722 |
+
print("Using copy")
|
| 723 |
+
messages = [
|
| 724 |
+
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
|
| 725 |
+
HumanMessage(content=copy_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
|
| 726 |
+
print(messages[1].content)
|
| 727 |
+
print('------------')
|
| 728 |
+
|
| 729 |
+
elif len(structure.strip()) > 0:
|
| 730 |
+
print('------------')
|
| 731 |
+
print("Using structure")
|
| 732 |
+
# print("Improving structure")
|
| 733 |
+
# structure = improve_structure(chat=chat, structure=structure)
|
| 734 |
+
messages = [
|
| 735 |
+
SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. Make sure to write in {languages[i]} language."""),
|
| 736 |
+
HumanMessage(content=struct_prompt.format(nversions=nversions, min_length=min_length, max_length=max_length, key_features=key_features, structure=structure, copy=copy, included_phrases=included_phrases, excluded_phrases=excluded_phrases)),]
|
| 737 |
+
print(messages[1].content)
|
| 738 |
+
print('------------')
|
| 739 |
+
batch.append(messages)
|
| 740 |
+
|
| 741 |
+
response = chat.batch(batch)
|
| 742 |
|
| 743 |
descriptions = ""
|
|
|
|
|
|
|
|
|
|
| 744 |
|
|
|
|
|
|
|
| 745 |
descriptions = []
|
| 746 |
descriptions_post = []
|
| 747 |
+
|
| 748 |
+
if model.startswith("gemini"):
|
| 749 |
+
descriptions = [msg.text for msg in response]
|
| 750 |
+
descriptions_post = [post_process(text=desc,
|
| 751 |
+
guidance_prompt=guidance_prompt,
|
| 752 |
+
language=languages[i],
|
| 753 |
+
chat=None) for i, desc in enumerate(descriptions)]
|
| 754 |
+
|
| 755 |
+
alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else ""
|
| 756 |
+
alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {}
|
| 757 |
+
result_json = {"outputs": descriptions, "alt_text": alt_text_dict}
|
| 758 |
+
else:
|
| 759 |
+
parser = JsonOutputParser()
|
| 760 |
+
jresponse = [parser.parse(msg.content) for msg in response]
|
| 761 |
+
for i, jr in enumerate(jresponse):
|
| 762 |
+
print(f'{jr=}')
|
| 763 |
+
bestid = jr["best_version"]["id"]
|
| 764 |
+
for d in jr["versions"]:
|
| 765 |
+
if d["id"] == bestid:
|
| 766 |
+
bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {jr['best_version']['explanation']}" if debug else "")
|
| 767 |
+
bests = d["score"]
|
| 768 |
+
break
|
| 769 |
+
|
| 770 |
+
evaluated = evaluate(descriptions=jr["versions"],
|
| 771 |
+
reference_structure=struct_ref[2 * i],
|
| 772 |
+
reference_copy=struct_ref[2 * i + 1],
|
| 773 |
+
key_features=key_features,
|
| 774 |
+
included_phrases=included_phrases,
|
| 775 |
+
excluded_phrases=excluded_phrases,
|
| 776 |
+
language=languages[i], chat=chat)
|
| 777 |
+
print(f'{evaluated=}')
|
| 778 |
+
|
| 779 |
+
bestd_post = post_process(text=bestd,
|
| 780 |
+
guidance_prompt=guidance_prompt,
|
| 781 |
+
language=languages[i], chat=chat)
|
| 782 |
|
| 783 |
+
descriptions.append(bestd)
|
| 784 |
+
descriptions_post.append(bestd_post)
|
| 785 |
|
| 786 |
+
alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else ""
|
| 787 |
+
alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {}
|
|
|
|
|
|
|
|
|
|
| 788 |
|
| 789 |
+
result_json = {"outputs": jresponse if debug else descriptions, "alt_text": alt_text_dict}
|
| 790 |
|
| 791 |
+
md_content = "\n\n---\n\n".join(descriptions)
|
| 792 |
|
| 793 |
+
|
|
|
|
| 794 |
|
| 795 |
# post_content = post_process(text=md_content, guidance_prompt=guidance_prompt, language=languages, chat=chat)
|
| 796 |
|
|
|
|
| 850 |
garment_type = gr.Textbox(label="Garment Type", value="all", lines=1, interactive=True)
|
| 851 |
# language = gr.Dropdown(languages, value="American English", interactive=True, label="Language")
|
| 852 |
with gr.Accordion(label="Advanced Options", open=False):
|
| 853 |
+
model = gr.Dropdown(models, value=default_model, interactive=True, label="Model", visible=True)
|
| 854 |
temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
|
| 855 |
nversions = gr.Slider(minimum=1, maximum=10, value=5, step=int, interactive=True, label="Number of versions", visible=True)
|
| 856 |
excluded_phrases = gr.Textbox(label="Excluded words", interactive=True, lines=2)
|
requirements.txt
CHANGED
|
@@ -8,4 +8,5 @@ bcrypt
|
|
| 8 |
langchain_chroma
|
| 9 |
langchainhub
|
| 10 |
langchain_community
|
| 11 |
-
langchain-google-genai
|
|
|
|
|
|
| 8 |
langchain_chroma
|
| 9 |
langchainhub
|
| 10 |
langchain_community
|
| 11 |
+
langchain-google-genai
|
| 12 |
+
google-generativeai
|
str2escaped.py
CHANGED
|
@@ -203,9 +203,40 @@ Return the result in in the following JSON format without any preceding or trail
|
|
| 203 |
"category": the category of the garment, for example 'dress', 'shirt', 'pants', 'shoes', etc
|
| 204 |
}}"""
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
import codecs
|
| 207 |
if __name__ == "__main__":
|
| 208 |
-
print(str2escaped(
|
| 209 |
# print()
|
| 210 |
# print('"write_struct_copy": "' + str2escaped(both).strip() + '",')
|
| 211 |
# print()
|
|
|
|
| 203 |
"category": the category of the garment, for example 'dress', 'shirt', 'pants', 'shoes', etc
|
| 204 |
}}"""
|
| 205 |
|
| 206 |
+
|
| 207 |
+
gemini_prompt = """You are given information of a product, a reference structure, and a reference copy.
|
| 208 |
+
Please analyze the structure, make a plan on how to follow the structure correctly, and write a product
|
| 209 |
+
description for the product. Use the tone of voice of the reference copy for the generated description.
|
| 210 |
+
Write from {min_length} to {max_length} words.
|
| 211 |
+
Do not hallucinate, do not add information that is not in the product information.
|
| 212 |
+
Try your best to avoid using the excluded words and phrases.
|
| 213 |
+
Try your best to include the included words and phrases.
|
| 214 |
+
Do not enclose the output in html tags, quotes, braces, brackets or anything.
|
| 215 |
+
Return the product description only.
|
| 216 |
+
|
| 217 |
+
<product_information>
|
| 218 |
+
{key_features}
|
| 219 |
+
</product_information>
|
| 220 |
+
|
| 221 |
+
<reference_structure>
|
| 222 |
+
{structure}
|
| 223 |
+
</reference_structure>
|
| 224 |
+
|
| 225 |
+
<reference_copy>
|
| 226 |
+
{copy}
|
| 227 |
+
</reference_copy>
|
| 228 |
+
|
| 229 |
+
<excluded_phrases>
|
| 230 |
+
{excluded_phrases}
|
| 231 |
+
</excluded_phrases>
|
| 232 |
+
|
| 233 |
+
<included_phrases>
|
| 234 |
+
{included_phrases}
|
| 235 |
+
</included_phrases>"""
|
| 236 |
+
|
| 237 |
import codecs
|
| 238 |
if __name__ == "__main__":
|
| 239 |
+
print(str2escaped(gemini_prompt))
|
| 240 |
# print()
|
| 241 |
# print('"write_struct_copy": "' + str2escaped(both).strip() + '",')
|
| 242 |
# print()
|