Spaces:

abhiimanyu
/

LearningContent

Sleeping

App Files Files Community

abhiimanyu commited on Dec 11, 2024

Commit

c2cc992

verified ·

1 Parent(s): f154ffe

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -21

app.py CHANGED Viewed

@@ -46,22 +46,12 @@ def clean_and_format_learning_content(output):
         cleaned_output = cleaned_output.replace('\\"', '"')  # Fix improperly escaped quotes
         cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output)  # Remove trailing commas
-        # Step 2: Fix the 'sections' structure
-        # Ensure 'sections' is an array, not a single object
-        sections_pattern = r'"sections":\s*{'
-        if re.search(sections_pattern, cleaned_output):
-            # Replace the invalid opening brace with an array opening
-            cleaned_output = re.sub(sections_pattern, '"sections": [ {', cleaned_output)
-            # Add a closing array bracket at the end of 'sections'
-            cleaned_output = re.sub(r'(}\s*,?\s*)]}', r'} ] }', cleaned_output)
-        # Step 3: Remove invalid syntax in 'content' fields
-        cleaned_output = re.sub(r'\[.*?\]\(.*?\)', '', cleaned_output)  # Remove Markdown-like links
-        # Step 4: Fix missing commas between JSON objects
-        cleaned_output = re.sub(r'(\})(\s*{)', r'\1,\2', cleaned_output)
-        # Step 5: Fix unbalanced brackets and braces
         open_braces = cleaned_output.count('{')
         close_braces = cleaned_output.count('}')
         open_brackets = cleaned_output.count('[')
@@ -71,18 +61,23 @@ def clean_and_format_learning_content(output):
         if open_brackets > close_brackets:
             cleaned_output += ']' * (open_brackets - close_brackets)
-        # Step 6: Attempt to parse JSON
         json_output = json.loads(cleaned_output)
-        # Step 7: Validate JSON structure
         required_keys = ["title", "sections"]
         if "title" not in json_output or "sections" not in json_output:
             raise ValueError("Missing required keys: 'title' or 'sections'.")
         if not isinstance(json_output["sections"], list):
-            raise ValueError("'sections' must be a list.")
-        for section in json_output["sections"]:
-            if "subheading" not in section or "content" not in section:
-                raise ValueError("Each section must contain 'subheading' and 'content'.")
         return json_output
@@ -94,6 +89,7 @@ def clean_and_format_learning_content(output):
             "output": cleaned_output
         }
 # Function to generate learning content
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
     """

         cleaned_output = cleaned_output.replace('\\"', '"')  # Fix improperly escaped quotes
         cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output)  # Remove trailing commas
+        # Step 2: Fix invalid 'sections' fields
+        # Replace invalid sections (e.g., sections:) with an empty array
+        if re.search(r'"sections":\s*,', cleaned_output):
+            cleaned_output = re.sub(r'"sections":\s*,', '"sections": []', cleaned_output)
+        # Fix unbalanced brackets or braces
         open_braces = cleaned_output.count('{')
         close_braces = cleaned_output.count('}')
         open_brackets = cleaned_output.count('[')
         if open_brackets > close_brackets:
             cleaned_output += ']' * (open_brackets - close_brackets)
+        # Fix commas between objects in arrays
+        cleaned_output = re.sub(r'(\})(\s*{)', r'\1,\2', cleaned_output)
+        # Step 3: Attempt to parse JSON
         json_output = json.loads(cleaned_output)
+        # Step 4: Validate JSON structure
         required_keys = ["title", "sections"]
         if "title" not in json_output or "sections" not in json_output:
             raise ValueError("Missing required keys: 'title' or 'sections'.")
         if not isinstance(json_output["sections"], list):
+            # If 'sections' is not a list, replace it with an empty list
+            json_output["sections"] = []
+        else:
+            for section in json_output["sections"]:
+                if "subheading" not in section or "content" not in section:
+                    raise ValueError("Each section must contain 'subheading' and 'content'.")
         return json_output
             "output": cleaned_output
         }
 # Function to generate learning content
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
     """