Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,10 +34,10 @@ def format_prompt(topic, description, difficulty):
|
|
| 34 |
# Function to clean and format the AI output
|
| 35 |
def clean_and_format_learning_content(output):
|
| 36 |
"""
|
| 37 |
-
Cleans, validates, and
|
| 38 |
"""
|
| 39 |
try:
|
| 40 |
-
# Step 1: Clean
|
| 41 |
cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output) # Remove non-ASCII characters
|
| 42 |
cleaned_output = re.sub(r'`|<s>|</s>|◀|▶', '', cleaned_output) # Remove extraneous symbols
|
| 43 |
cleaned_output = re.sub(r'^[^{]*', '', cleaned_output) # Remove text before the first '{'
|
|
@@ -46,14 +46,17 @@ def clean_and_format_learning_content(output):
|
|
| 46 |
cleaned_output = cleaned_output.replace('\\"', '"') # Fix improperly escaped quotes
|
| 47 |
cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
|
| 48 |
|
| 49 |
-
# Step 2: Fix
|
| 50 |
-
|
| 51 |
-
cleaned_output = re.sub(r'\]\]', ']', cleaned_output)
|
| 52 |
|
| 53 |
-
# Fix
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
cleaned_output = re.sub(r'(\})(\s*{)', r'\1,\2', cleaned_output)
|
| 55 |
|
| 56 |
-
#
|
| 57 |
open_braces = cleaned_output.count('{')
|
| 58 |
close_braces = cleaned_output.count('}')
|
| 59 |
open_brackets = cleaned_output.count('[')
|
|
@@ -63,10 +66,10 @@ def clean_and_format_learning_content(output):
|
|
| 63 |
if open_brackets > close_brackets:
|
| 64 |
cleaned_output += ']' * (open_brackets - close_brackets)
|
| 65 |
|
| 66 |
-
# Step
|
| 67 |
-
json_output = json.loads(cleaned_output)
|
| 68 |
|
| 69 |
-
# Step
|
| 70 |
required_keys = ["title", "sections"]
|
| 71 |
if "title" not in json_output or "sections" not in json_output:
|
| 72 |
raise ValueError("Missing required keys: 'title' or 'sections'.")
|
|
@@ -86,15 +89,6 @@ def clean_and_format_learning_content(output):
|
|
| 86 |
"output": cleaned_output
|
| 87 |
}
|
| 88 |
|
| 89 |
-
except (json.JSONDecodeError, ValueError) as e:
|
| 90 |
-
# Provide detailed error information for debugging
|
| 91 |
-
return {
|
| 92 |
-
"error": "Failed to parse or validate output as JSON",
|
| 93 |
-
"details": str(e),
|
| 94 |
-
"output": cleaned_output
|
| 95 |
-
}
|
| 96 |
-
|
| 97 |
-
|
| 98 |
# Function to generate learning content
|
| 99 |
def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
|
| 100 |
"""
|
|
|
|
| 34 |
# Function to clean and format the AI output
|
| 35 |
def clean_and_format_learning_content(output):
|
| 36 |
"""
|
| 37 |
+
Cleans, validates, and repairs JSON output for learning content.
|
| 38 |
"""
|
| 39 |
try:
|
| 40 |
+
# Step 1: Clean raw output
|
| 41 |
cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output) # Remove non-ASCII characters
|
| 42 |
cleaned_output = re.sub(r'`|<s>|</s>|◀|▶', '', cleaned_output) # Remove extraneous symbols
|
| 43 |
cleaned_output = re.sub(r'^[^{]*', '', cleaned_output) # Remove text before the first '{'
|
|
|
|
| 46 |
cleaned_output = cleaned_output.replace('\\"', '"') # Fix improperly escaped quotes
|
| 47 |
cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
|
| 48 |
|
| 49 |
+
# Step 2: Fix Markdown-style links (if unintended)
|
| 50 |
+
cleaned_output = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', cleaned_output)
|
|
|
|
| 51 |
|
| 52 |
+
# Step 3: Fix extra closing braces or brackets
|
| 53 |
+
cleaned_output = re.sub(r'\]\]', ']', cleaned_output) # Remove extra closing brackets
|
| 54 |
+
cleaned_output = re.sub(r'\}\}', '}', cleaned_output) # Remove extra closing braces
|
| 55 |
+
|
| 56 |
+
# Step 4: Fix missing commas between JSON objects
|
| 57 |
cleaned_output = re.sub(r'(\})(\s*{)', r'\1,\2', cleaned_output)
|
| 58 |
|
| 59 |
+
# Step 5: Balance brackets and braces
|
| 60 |
open_braces = cleaned_output.count('{')
|
| 61 |
close_braces = cleaned_output.count('}')
|
| 62 |
open_brackets = cleaned_output.count('[')
|
|
|
|
| 66 |
if open_brackets > close_brackets:
|
| 67 |
cleaned_output += ']' * (open_brackets - close_brackets)
|
| 68 |
|
| 69 |
+
# Step 6: Attempt to parse JSON
|
| 70 |
+
json_output = json.loads(cleaned_output)
|
| 71 |
|
| 72 |
+
# Step 7: Validate JSON structure
|
| 73 |
required_keys = ["title", "sections"]
|
| 74 |
if "title" not in json_output or "sections" not in json_output:
|
| 75 |
raise ValueError("Missing required keys: 'title' or 'sections'.")
|
|
|
|
| 89 |
"output": cleaned_output
|
| 90 |
}
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
# Function to generate learning content
|
| 93 |
def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
|
| 94 |
"""
|