Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -46,22 +46,12 @@ def clean_and_format_learning_content(output):
|
|
| 46 |
cleaned_output = cleaned_output.replace('\\"', '"') # Fix improperly escaped quotes
|
| 47 |
cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
|
| 48 |
|
| 49 |
-
# Step 2: Fix
|
| 50 |
-
#
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
# Replace the invalid opening brace with an array opening
|
| 54 |
-
cleaned_output = re.sub(sections_pattern, '"sections": [ {', cleaned_output)
|
| 55 |
-
# Add a closing array bracket at the end of 'sections'
|
| 56 |
-
cleaned_output = re.sub(r'(}\s*,?\s*)]}', r'} ] }', cleaned_output)
|
| 57 |
-
|
| 58 |
-
# Step 3: Remove invalid syntax in 'content' fields
|
| 59 |
-
cleaned_output = re.sub(r'\[.*?\]\(.*?\)', '', cleaned_output) # Remove Markdown-like links
|
| 60 |
-
|
| 61 |
-
# Step 4: Fix missing commas between JSON objects
|
| 62 |
-
cleaned_output = re.sub(r'(\})(\s*{)', r'\1,\2', cleaned_output)
|
| 63 |
|
| 64 |
-
#
|
| 65 |
open_braces = cleaned_output.count('{')
|
| 66 |
close_braces = cleaned_output.count('}')
|
| 67 |
open_brackets = cleaned_output.count('[')
|
|
@@ -71,18 +61,23 @@ def clean_and_format_learning_content(output):
|
|
| 71 |
if open_brackets > close_brackets:
|
| 72 |
cleaned_output += ']' * (open_brackets - close_brackets)
|
| 73 |
|
| 74 |
-
#
|
|
|
|
|
|
|
|
|
|
| 75 |
json_output = json.loads(cleaned_output)
|
| 76 |
|
| 77 |
-
# Step
|
| 78 |
required_keys = ["title", "sections"]
|
| 79 |
if "title" not in json_output or "sections" not in json_output:
|
| 80 |
raise ValueError("Missing required keys: 'title' or 'sections'.")
|
| 81 |
if not isinstance(json_output["sections"], list):
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
| 86 |
|
| 87 |
return json_output
|
| 88 |
|
|
@@ -94,6 +89,7 @@ def clean_and_format_learning_content(output):
|
|
| 94 |
"output": cleaned_output
|
| 95 |
}
|
| 96 |
|
|
|
|
| 97 |
# Function to generate learning content
|
| 98 |
def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
|
| 99 |
"""
|
|
|
|
| 46 |
cleaned_output = cleaned_output.replace('\\"', '"') # Fix improperly escaped quotes
|
| 47 |
cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
|
| 48 |
|
| 49 |
+
# Step 2: Fix invalid 'sections' fields
|
| 50 |
+
# Replace invalid sections (e.g., sections:) with an empty array
|
| 51 |
+
if re.search(r'"sections":\s*,', cleaned_output):
|
| 52 |
+
cleaned_output = re.sub(r'"sections":\s*,', '"sections": []', cleaned_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
+
# Fix unbalanced brackets or braces
|
| 55 |
open_braces = cleaned_output.count('{')
|
| 56 |
close_braces = cleaned_output.count('}')
|
| 57 |
open_brackets = cleaned_output.count('[')
|
|
|
|
| 61 |
if open_brackets > close_brackets:
|
| 62 |
cleaned_output += ']' * (open_brackets - close_brackets)
|
| 63 |
|
| 64 |
+
# Fix commas between objects in arrays
|
| 65 |
+
cleaned_output = re.sub(r'(\})(\s*{)', r'\1,\2', cleaned_output)
|
| 66 |
+
|
| 67 |
+
# Step 3: Attempt to parse JSON
|
| 68 |
json_output = json.loads(cleaned_output)
|
| 69 |
|
| 70 |
+
# Step 4: Validate JSON structure
|
| 71 |
required_keys = ["title", "sections"]
|
| 72 |
if "title" not in json_output or "sections" not in json_output:
|
| 73 |
raise ValueError("Missing required keys: 'title' or 'sections'.")
|
| 74 |
if not isinstance(json_output["sections"], list):
|
| 75 |
+
# If 'sections' is not a list, replace it with an empty list
|
| 76 |
+
json_output["sections"] = []
|
| 77 |
+
else:
|
| 78 |
+
for section in json_output["sections"]:
|
| 79 |
+
if "subheading" not in section or "content" not in section:
|
| 80 |
+
raise ValueError("Each section must contain 'subheading' and 'content'.")
|
| 81 |
|
| 82 |
return json_output
|
| 83 |
|
|
|
|
| 89 |
"output": cleaned_output
|
| 90 |
}
|
| 91 |
|
| 92 |
+
|
| 93 |
# Function to generate learning content
|
| 94 |
def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
|
| 95 |
"""
|