Spaces:

abhiimanyu
/

LearningContent

Sleeping

App Files Files Community

abhiimanyu commited on Nov 18, 2024

Commit

3668272

verified ·

1 Parent(s): 3f1d78e

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -9

app.py CHANGED Viewed

@@ -34,30 +34,29 @@ def format_prompt(topic, description, difficulty):
 def clean_and_format_output(output):
     """
     Cleans, validates, and attempts to auto-repair JSON output from the AI model.
     """
     try:
         # Step 1: Clean the raw output
         cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output)  # Remove non-ASCII characters
-        cleaned_output = re.sub(r'`|<s>|</s>|◀', '', cleaned_output)  # Remove extraneous symbols
-        cleaned_output = re.sub(r'^[^{]*', '', cleaned_output)  # Remove text before first '{'
-        cleaned_output = re.sub(r'[^}]*$', '', cleaned_output)  # Remove text after last '}'
-        cleaned_output = re.sub(r'\\_', '_', cleaned_output)  # Replace improperly escaped underscores
         cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip()  # Normalize whitespace
-        cleaned_output = cleaned_output.replace('\\"', '"')  # Fix improperly escaped quotes
         cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output)  # Remove trailing commas
         # Step 2: Attempt to parse the cleaned output
         try:
             json_output = json.loads(cleaned_output)
         except json.JSONDecodeError as e:
-            # Attempt to auto-repair truncated JSON
-            if "Expecting ',' delimiter" in str(e):
-                cleaned_output += "]}"  # Add missing array and object closures
                 json_output = json.loads(cleaned_output)  # Retry parsing
             else:
                 raise e
-        # Step 3: Validate required structure
         if "title" not in json_output or "sections" not in json_output:
             raise ValueError("Missing required keys: 'title' or 'sections'.")
         if not isinstance(json_output["sections"], list):
@@ -81,6 +80,7 @@ def clean_and_format_output(output):
             "output": cleaned_output
         }
 # Function to generate learning content
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
     """

 def clean_and_format_output(output):
     """
     Cleans, validates, and attempts to auto-repair JSON output from the AI model.
+    Handles truncated and malformed JSON gracefully.
     """
     try:
         # Step 1: Clean the raw output
         cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output)  # Remove non-ASCII characters
+        cleaned_output = re.sub(r'`|<s>|</s>|◀|▶', '', cleaned_output)  # Remove extraneous symbols
+        cleaned_output = re.sub(r'^[^{]*', '', cleaned_output)  # Remove text before the first '{'
+        cleaned_output = re.sub(r'[^}]*$', '', cleaned_output)  # Remove text after the last '}'
         cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip()  # Normalize whitespace
         cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output)  # Remove trailing commas
         # Step 2: Attempt to parse the cleaned output
         try:
             json_output = json.loads(cleaned_output)
         except json.JSONDecodeError as e:
+            # Attempt to auto-repair if truncation is detected
+            if "Expecting" in str(e) or cleaned_output[-1] != "}":
+                cleaned_output += "}"  # Close the JSON object
                 json_output = json.loads(cleaned_output)  # Retry parsing
             else:
                 raise e
+        # Step 3: Validate the required structure
         if "title" not in json_output or "sections" not in json_output:
             raise ValueError("Missing required keys: 'title' or 'sections'.")
         if not isinstance(json_output["sections"], list):
             "output": cleaned_output
         }
 # Function to generate learning content
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
     """