Spaces:

creativeforce
/

Copywriting

Paused

App Files Files Community

tunght commited on Jul 30, 2024

Commit

0d5fa0e

1 Parent(s): 53440c5

add debug info, working on glossary

Browse files

Files changed (2) hide show

app.py +134 -95
glossary.py +16 -0

app.py CHANGED Viewed

@@ -11,6 +11,15 @@ from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages
 from langchain_groq import ChatGroq
 import openai
 feature_text = "Brand: Duckly. \nProduct name: Duck runner pro. \nKey properties: t-shirt, for running, sweat wicking, for marathon, 100% cotton."
@@ -215,12 +224,25 @@ def get_model(model_name):
     return chat
 def generate(*data):
     global visible
     print("visible", visible)
     nargs = 9
-    feature, image, garment_type, model, temperature, excluded_words, included_words, glossary, debug = data[:nargs]
     struct_ref = data[nargs:]
     print(f"{feature=}")
@@ -231,9 +253,14 @@ def generate(*data):
     print(f"{excluded_words=}")
     print(f"{included_words=}")
     print(f"{debug=}")
-    print(f"{glossary=}")
     # print(f"{struct_ref=}")
     chat = get_model(model)
     types, struct_languages = parse_structure(struct_ref)
@@ -255,6 +282,12 @@ def generate(*data):
         intended_use = "Intended use: " + ", ".join(image_features["intended_use"])
         print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}")
     batch = []
     for i in range(visible + 1):
         structure = struct_ref[2 * i]
@@ -288,26 +321,35 @@ Include all included words in the output.
 Do not hallucinate any information.
 Use creative language in each output.
 Rate the quality of each version based on the following criteria:
-    - how well it follows the reference copy's tone of voice, rythm, cadence and style.
     - how well it follows the reference structure.
     - how faithful it describes the product features.
     - how creative the language is.
 The score should be a number between 0 and 10 with 10 being the best quality.
 Return the result in the following JSON format:
-[
-    {{
-        "id": 1,
-        "content": The first product description,
-        "score": The score of the first product description,
-        "explanation": A less than 20 word explanation of the score of the first product description
-    }},
-    {{
-        "id": 2,
-        "content": The second product description,
-        "score": The score of the second product description
-        "explanation": A less than 20 word explanation of the score of the second product description
-    }},
-]
 Make sure that the output is in JSON format, no extra text should be included in the output.
 Product information:
@@ -333,26 +375,35 @@ Include all included words in the output.
 Do not hallucinate any information.
 Use creative language in each output.
 Rate the quality of each version based on the following criteria:
-    - how well it follows the reference copy's tone of voice, rythm, cadence and style.
-    - how well it follows the reference copy's structure.
     - how faithful it describes the product features.
-    - how creative the language is.
 The score should be a number between 0 and 10 with 10 being the best quality.
 Return the result in the following JSON format:
-[
-    {{
-        "id": 1,
-        "content": The first product description,
-        "score": The score of the first product description,
-        "explanation": A less than 20 word explanation of the score of the first product description
-    }},
-    {{
-        "id": 2,
-        "content": The second product description,
-        "score": The score of the second product description
-        "explanation": A less than 20 word explanation of the score of the second product description
-    }},
-]
 Make sure that the output is in JSON format, no extra text should be included in the output.
 Product information:
@@ -381,23 +432,32 @@ Rate the quality of each version based on the following criteria:
     - how well it follows the reference tone of voice, rythm, cadence and style.
     - how well it follows the reference structure.
     - how faithful it describes the product features.
     - how creative the language is.
 The score should be a number between 0 and 10 with 10 being the best quality.
 Return the result in the following JSON format:
-[
-    {{
-        "id": 1,
-        "content": The first product description,
-        "score": The score of the first product description,
-        "explanation": A less than 20 word explanation of the score of the first product description
-    }},
-    {{
-        "id": 2,
-        "content": The second product description,
-        "score": The score of the second product description
-        "explanation": A less than 20 word explanation of the score of the second product description
-    }},
-]
 Make sure that the output is in JSON format, no extra text should be included in the output.
 Product information:
@@ -416,61 +476,39 @@ Excluded words: {excluded_words}"""),]
     response = chat.batch(batch, temperature=temperature)
     print(response)
-    # batch = []
-    # rewrite_map = {}
-    # for i in range(visible + 1):
-    #     structure = struct_ref[2 * i]
-    #     reference = struct_ref[2 * i + 1]
-    #     if len(reference.strip()) > 0:
-    #         messages = [
-    #             SystemMessage(content=f"""You are a helpful assistant that writes about products for ecommerce websites. You write in {languages[i]} language."""),
-    #             HumanMessage(content=f"""Rewrite the following product description in the style and tone of voice
-    #                           of the reference product description.
-    #                          Make sure that the structure and length of the output is similar to the reference product description.
-    #                          Make sure that the output is written in {languages[i]} language.
-    #                          Output the product description in markdown format.
-    #                          Product description to rewirte:
-    #                          ```{response[i].content}```
-    #                          Reference product description:
-    #                          ```{reference}```
-    #                          """)
-    #         ]
-    #         batch.append(messages)
-    #         rewrite_map[i] = len(batch) - 1
-    # print("Rewrite_map", rewrite_map)
-    # print("Rewriting")
-    # re_response = chat.batch(batch, temperature=temperature)
-    # for i in range(len(re_response)):
-    #     print(f"Original: {response[i].content}")
-    #     print(f"Rewritten: {re_response[i].content}")
-    # response = [re_response[rewrite_map[i]] if i in rewrite_map else response[i] for i in range(visible + 1)]
-    # print("Done rewriting")
     parser = JsonOutputParser()
     jresponse = [parser.parse(msg.content) for msg in response]
     descriptions = []
     for jr in jresponse:
-        bests = 0
-        bestd = ""
-        for d in jr:
-            print(f'{d["score"]=}, {d["id"]=}, {bests=}')
-            if d["score"] > bests:
-                bests = d["score"]
-                bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
-            elif d["score"] == bests and random.random() > 0.5:
-                bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
         descriptions.append(bestd)
-    # description = "\n\n\n\n".join([msg.content for msg in response])
-    md_content = "\n\n\n".join(descriptions)
     alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else ""
     alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {}
-    result_json = {"outputs": descriptions, "alt_text": alt_text_dict}
     result_md = md_content + alt_texts_str + '\n'.join([f'![Product photo](data:image/png;base64,{base64_image} "{alt_text}")' if base64_image != "" else "" for (base64_image, alt_text) in zip(base64_images, alt_texts)])
     return result_md, result_json
@@ -526,7 +564,8 @@ with gr.Blocks() as demo:
                 temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
                 excluded_words = gr.Textbox(label="Excluded words", interactive=True, lines=2)
                 included_words = gr.Textbox(label="Included words", interactive=True, lines=2)
-                glossary = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2,"static"), headers=["Description", "Way of writing"], label="Glossary", interactive=True)
                 debug = gr.Checkbox(label="Debug", interactive=True, value=True)
             with gr.Row():
                 submit = gr.Button(value="Submit")
@@ -548,7 +587,7 @@ with gr.Blocks() as demo:
             md_output = gr.Markdown(label="Output", show_label=True)
             json_output = gr.JSON(label="JSON Output")
         submit.click(generate, inputs=[feature, image, garment_type, model, temperature,
-                                       excluded_words, included_words, glossary, debug, *struct_ref],
                      outputs=[md_output, json_output])
         # advanced.click(show_advanced, inputs=[], outputs=[model, temperature])

 from langchain_groq import ChatGroq
 import openai
+from langchain import hub
+from langchain_chroma import Chroma
+from langchain_community.document_loaders import WebBaseLoader, CSVLoader
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_openai import OpenAIEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_core.vectorstores import VectorStoreRetriever
 feature_text = "Brand: Duckly. \nProduct name: Duck runner pro. \nKey properties: t-shirt, for running, sweat wicking, for marathon, 100% cotton."
     return chat
+def build_glossary(glossary_file, fieldnames=None) -> VectorStoreRetriever:
+    loader = CSVLoader(file_path=glossary_file,
+                   csv_args={"delimiter": ",",
+                             "quotechar": '"'})
+                            #  "fieldnames": fieldnames})
+    docs = loader.load()
+    vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings())
+    retriever = vectorstore.as_retriever()
+    return retriever
 def generate(*data):
     global visible
     print("visible", visible)
     nargs = 9
+    feature, image, garment_type, model, temperature, excluded_words, included_words, glossary_upload, debug = data[:nargs]
     struct_ref = data[nargs:]
     print(f"{feature=}")
     print(f"{excluded_words=}")
     print(f"{included_words=}")
     print(f"{debug=}")
+    # print(f"{glossary=}")
+    print(f"{glossary_upload=}")
     # print(f"{struct_ref=}")
+    glossary = None
+    if glossary_upload is not None:
+        glossary = build_glossary(glossary_upload)
     chat = get_model(model)
     types, struct_languages = parse_structure(struct_ref)
         intended_use = "Intended use: " + ", ".join(image_features["intended_use"])
         print(f"Detected features: {detected_features}, Intended use: {intended_use}, Alt text: {alt_texts}")
+    if glossary:
+        print("Getting terms")
+        terms = glossary.invoke(input=feature + detected_features)
+        for term in terms:
+            print(term)
     batch = []
     for i in range(visible + 1):
         structure = struct_ref[2 * i]
 Do not hallucinate any information.
 Use creative language in each output.
 Rate the quality of each version based on the following criteria:
+    - how well it follows the reference tone of voice, rythm, cadence and style.
     - how well it follows the reference structure.
     - how faithful it describes the product features.
+    - how well it avoid the excluded words.
+    - how well it includes the included words.
     - how creative the language is.
 The score should be a number between 0 and 10 with 10 being the best quality.
 Return the result in the following JSON format:
+{{
+    "versions": [
+        {{
+            "id": 1,
+            "content": The first product description,
+            "explanation": A less than 20 word explanation of the score of the first product description,
+            "score": The score of the first product description
+        }},
+        {{
+            "id": 2,
+            "content": The second product description,
+            "explanation": A less than 20 word explanation of the score of the first product description,
+            "score": The score of the second product description
+        }},
+        ...
+    ],
+    "best_version": {{
+        "id": The id of the best version,
+        "explanation": Explanation for why this version is the best
+    }}
+}}
 Make sure that the output is in JSON format, no extra text should be included in the output.
 Product information:
 Do not hallucinate any information.
 Use creative language in each output.
 Rate the quality of each version based on the following criteria:
+    - how well it follows the reference tone of voice, rythm, cadence and style.
+    - how well it follows the reference structure.
     - how faithful it describes the product features.
+    - how well it avoid the excluded words.
+    - how well it includes the included words.
+    - how creative the language is.
 The score should be a number between 0 and 10 with 10 being the best quality.
 Return the result in the following JSON format:
+{{
+    "versions": [
+        {{
+            "id": 1,
+            "content": The first product description,
+            "explanation": A less than 20 word explanation of the score of the first product description,
+            "score": The score of the first product description
+        }},
+        {{
+            "id": 2,
+            "content": The second product description,
+            "explanation": A less than 20 word explanation of the score of the first product description,
+            "score": The score of the second product description
+        }},
+        ...
+    ],
+    "best_version": {{
+        "id": The id of the best version,
+        "explanation": Explanation for why this version is the best
+    }}
+}}
 Make sure that the output is in JSON format, no extra text should be included in the output.
 Product information:
     - how well it follows the reference tone of voice, rythm, cadence and style.
     - how well it follows the reference structure.
     - how faithful it describes the product features.
+    - how well it avoid the excluded words.
+    - how well it includes the included words.
     - how creative the language is.
 The score should be a number between 0 and 10 with 10 being the best quality.
 Return the result in the following JSON format:
+{{
+    "versions": [
+        {{
+            "id": 1,
+            "content": The first product description,
+            "explanation": A less than 20 word explanation of the score of the first product description,
+            "score": The score of the first product description
+        }},
+        {{
+            "id": 2,
+            "content": The second product description,
+            "explanation": A less than 20 word explanation of the score of the first product description,
+            "score": The score of the second product description
+        }},
+        ...
+    ],
+    "best_version": {{
+        "id": The id of the best version,
+        "explanation": Explanation for why this version is the best
+    }}
+}}
 Make sure that the output is in JSON format, no extra text should be included in the output.
 Product information:
     response = chat.batch(batch, temperature=temperature)
     print(response)
     parser = JsonOutputParser()
     jresponse = [parser.parse(msg.content) for msg in response]
     descriptions = []
     for jr in jresponse:
+        print(f'{jr=}')
+        bestid = jr["best_version"]["id"]
+        for d in jr["versions"]:
+            if d["id"] == bestid:
+                bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {jr['best_version']['explanation']}" if debug else "")
+                bests  = d["score"]
+                break
+        # bests = 0
+        # bestd = ""
+        # for d in jr:
+        #     print(f'{d["score"]=}, {d["id"]=}, {bests=}')
+        #     if d["score"] > bests:
+        #         bests = d["score"]
+        #         bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
+        #     elif d["score"] == bests and random.random() > 0.5:
+        #         bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
+        #     if d["id"] == bestid:
+        #         bests = d["score"]
+        #         bestd = d["content"] + (f"\n\nDebug info:\n\nScore: {d['score']}\n\nExplanation: {d['explanation']}" if debug else "")
+        #         break
         descriptions.append(bestd)
+    md_content = "\n\n---\n\n".join(descriptions)
     alt_texts_str = '\n\n### Alt text\n\n' + '\n- ' + '\n- '.join(alt_texts) if len(alt_texts) > 0 else ""
     alt_text_dict = {k[0]: v for (k, v) in zip(image, alt_texts)} if len(alt_texts) > 0 else {}
+    result_json = {"outputs": jresponse if debug else descriptions, "alt_text": alt_text_dict}
     result_md = md_content + alt_texts_str + '\n'.join([f'![Product photo](data:image/png;base64,{base64_image} "{alt_text}")' if base64_image != "" else "" for (base64_image, alt_text) in zip(base64_images, alt_texts)])
     return result_md, result_json
                 temperature = gr.Slider(minimum=0., maximum=1.0, value=0., interactive=True, label="Temperature", visible=True)
                 excluded_words = gr.Textbox(label="Excluded words", interactive=True, lines=2)
                 included_words = gr.Textbox(label="Included words", interactive=True, lines=2)
+                # glossary = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2,"static"), headers=["Description", "Way of writing"], label="Glossary", interactive=True)
+                glossary_upload = gr.UploadButton(label="Upload Glossary", interactive=True, file_types=["csv"])
                 debug = gr.Checkbox(label="Debug", interactive=True, value=True)
             with gr.Row():
                 submit = gr.Button(value="Submit")
             md_output = gr.Markdown(label="Output", show_label=True)
             json_output = gr.JSON(label="JSON Output")
         submit.click(generate, inputs=[feature, image, garment_type, model, temperature,
+                                       excluded_words, included_words, glossary_upload, debug, *struct_ref],
                      outputs=[md_output, json_output])
         # advanced.click(show_advanced, inputs=[], outputs=[model, temperature])

glossary.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import random
+import traceback
+import gradio as gr
+import numpy as np
+import os
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_openai.chat_models import ChatOpenAI
+from langchain.schema import HumanMessage, SystemMessage, AIMessage
+from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages
+from langchain_groq import ChatGroq
+import openai
+def glossary_rag():
+    pass