Spaces:

WordLift
/

sd-validator

Running

App Files Files Community

cyberandy commited on Dec 13, 2024

Commit

73367c1

verified ·

1 Parent(s): 96a1157

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -47

app.py CHANGED Viewed

@@ -102,12 +102,19 @@ else:
         """
         try:
             # Create the system message for ChatGPT
-            prefix_messages = [{
                 "role": "system",
-                "content": '''You are a helpful and truthful SEO that is very good at analyzing websites with a specific focus on structured data.
-                            You are able to provide a detailed report on the website's structured data and how to improve it.
-                            ADD AS LEARN MORE LINKS FOR THE FIRST TEXT BLOCK LINKS TO structured data https://wordlift.io/blog/en/entity/structured-data/ and schema.org https://wordlift.io/blog/en/entity/schema-org/ TO PROVIDE ADDITIONAL HELP.
-                            YOU ARE WRITING THE REPORT IN HTML USING A TEMPLATE.'''
             }]
             # Initialize OpenAI client with basic configuration
@@ -125,19 +132,17 @@ else:
             if not _issues and len(_items) > 0:
                 # Case 1: When there are NO issues but there ARE items
                 template = """
-                First text block of the report.
-                Analyze the: {advice}, consider that the site features the following schema classes: {items}.
-                Second text block of the report.
-                The website's homepage also references the following entities: {topics} that could be used to improve the SEO of the website further.
-                Third text block of the report.
-                Describe, if available, IN A SINGLE SENTENCE the {technologies} that the site appears to be using and what they do.
-                THE OUTPUT MUST USE THE FOLLOWING TEMPLATE:
-                "first": "First text block with schema classes in <i>italic</i>",
-                "second": "Second text block with entities in <b>bold</b>",
-                "third": "Third text block with technologies in <i>italic</i>"
                 """
                 prompt = PromptTemplate(
                     template=template,
@@ -153,19 +158,16 @@ else:
             elif not _items:
                 # Case 2: When there are NO schema classes
                 template = """
-                First text block of the report.
-                The website homepage doesn't seem to feature any schema class.
-                Second text block of the report.
-                The website's homepage also references the following entities: {topics} that can be used to improve the SEO of the website.
-                Third text block of the report.
-                Describe, if available, IN A SINGLE SENTENCE the {technologies} that the site appears to be using and what they do.
-                THE OUTPUT MUST USE THE FOLLOWING TEMPLATE:
-                "first": "First text block",
-                "second": "Second text block with entities in <b>bold</b>",
-                "third": "Third text block with technologies in <i>italic</i>"
                 """
                 prompt = PromptTemplate(
                     template=template,
@@ -179,23 +181,19 @@ else:
             else:
                 # Case 3: When there ARE issues
                 template = """
-                First text block of the report.
-                Analyze the: {advice}, consider that the site features the following schema classes: {items}.
-                Second text block of the report.
-                Describe the following issues with the markup: {issues} and indicate how to fix them.
-                Third text block of the report.
-                The website's homepage also references the following entities: {topics} that could be used to improve the SEO of the website further.
-                Fourth text block of the report.
-                Describe, if available, IN A SINGLE SENTENCE the {technologies} that the site appears to be using and what they do.
-                THE OUTPUT MUST USE THE FOLLOWING TEMPLATE:
-                "first": "First text block with schema classes in <i>italic</i>",
-                "second": "Second text block with issues in <u>underline</u>",
-                "third": "Third text block with entities in <b>bold</b>",
-                "fourth": "Fourth text block with technologies in <i>italic</i>"
                 """
                 prompt = PromptTemplate(
                     template=template,
@@ -218,8 +216,7 @@ else:
                 response = client.chat.completions.create(
                     model="gpt-4o",
                     messages=messages,
-                    temperature=0.7,
-                    max_tokens=1500
                 )
                 if hasattr(response.choices[0].message, 'content'):
                     out = response.choices[0].message.content
@@ -240,6 +237,11 @@ else:
             error_message = f"An unexpected error occurred: {str(e)}"
             print(error_message)  # Log the error for debugging
             return error_message
     # Call WooRank API to get the data (cached)
     @st.cache_data

         """
         try:
             # Create the system message for ChatGPT
+                        prefix_messages = [{
                 "role": "system",
+                "content": '''You are an SEO expert specializing in structured data analysis. Your task is to create JSON-formatted reports about websites' structured data.
+    Key requirements:
+    1. Always format output as a valid JSON object
+    2. Use the exact structure provided in the template
+    3. Include HTML formatting (<i>, <b>, <u>) as specified
+    4. Add relevant links to structured data (https://wordlift.io/blog/en/entity/structured-data/) and schema.org (https://wordlift.io/blog/en/entity/schema-org/) in the first section
+    5. Keep responses concise but informative
+    6. Ensure proper JSON escaping for quotes and special characters
+    Remember: The output must be a single, valid JSON object that can be parsed without additional processing.'''
             }]
             # Initialize OpenAI client with basic configuration
             if not _issues and len(_items) > 0:
                 # Case 1: When there are NO issues but there ARE items
                 template = """
+                Based on the following data, create a JSON object with three parts:
+                1. {advice} and schema classes: {items}
+                2. Entities found: {topics}
+                3. Technologies: {technologies}
+                Structure your response as a valid JSON object with this exact format:
+                {{
+                    "first": "Analysis of schema classes with classes marked in <i>italic</i>",
+                    "second": "Description of entities marked in <b>bold</b>",
+                    "third": "Description of technologies in <i>italic</i>"
+                }}
                 """
                 prompt = PromptTemplate(
                     template=template,
             elif not _items:
                 # Case 2: When there are NO schema classes
                 template = """
+                Create a JSON object for a website with no schema classes, based on:
+                1. Entities found: {topics}
+                2. Technologies: {technologies}
+                Structure your response as a valid JSON object with this exact format:
+                {{
+                    "first": "Notice about missing schema classes",
+                    "second": "Description of entities marked in <b>bold</b>",
+                    "third": "Description of technologies in <i>italic</i>"
+                }}
                 """
                 prompt = PromptTemplate(
                     template=template,
             else:
                 # Case 3: When there ARE issues
                 template = """
+                Create a JSON object based on the following data:
+                1. {advice} and schema classes: {items}
+                2. Markup issues: {issues}
+                3. Entities found: {topics}
+                4. Technologies: {technologies}
+                Structure your response as a valid JSON object with this exact format:
+                {{
+                    "first": "Analysis of schema classes with classes marked in <i>italic</i>",
+                    "second": "Description of issues marked in <u>underline</u>",
+                    "third": "Description of entities marked in <b>bold</b>",
+                    "fourth": "Description of technologies in <i>italic</i>"
+                }}
                 """
                 prompt = PromptTemplate(
                     template=template,
                 response = client.chat.completions.create(
                     model="gpt-4o",
                     messages=messages,
+                    temperature=0.2,
                 )
                 if hasattr(response.choices[0].message, 'content'):
                     out = response.choices[0].message.content
             error_message = f"An unexpected error occurred: {str(e)}"
             print(error_message)  # Log the error for debugging
             return error_message
+        except Exception as e:
+            error_message = f"An unexpected error occurred: {str(e)}"
+            print(error_message)  # Log the error for debugging
+            return error_message
     # Call WooRank API to get the data (cached)
     @st.cache_data