Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,30 +34,29 @@ def format_prompt(topic, description, difficulty):
|
|
| 34 |
def clean_and_format_output(output):
|
| 35 |
"""
|
| 36 |
Cleans, validates, and attempts to auto-repair JSON output from the AI model.
|
|
|
|
| 37 |
"""
|
| 38 |
try:
|
| 39 |
# Step 1: Clean the raw output
|
| 40 |
cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output) # Remove non-ASCII characters
|
| 41 |
-
cleaned_output = re.sub(r'`|<s>|</s>|◀', '', cleaned_output) # Remove extraneous symbols
|
| 42 |
-
cleaned_output = re.sub(r'^[^{]*', '', cleaned_output) # Remove text before first '{'
|
| 43 |
-
cleaned_output = re.sub(r'[^}]*$', '', cleaned_output) # Remove text after last '}'
|
| 44 |
-
cleaned_output = re.sub(r'\\_', '_', cleaned_output) # Replace improperly escaped underscores
|
| 45 |
cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip() # Normalize whitespace
|
| 46 |
-
cleaned_output = cleaned_output.replace('\\"', '"') # Fix improperly escaped quotes
|
| 47 |
cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
|
| 48 |
|
| 49 |
# Step 2: Attempt to parse the cleaned output
|
| 50 |
try:
|
| 51 |
json_output = json.loads(cleaned_output)
|
| 52 |
except json.JSONDecodeError as e:
|
| 53 |
-
# Attempt to auto-repair
|
| 54 |
-
if "Expecting
|
| 55 |
-
cleaned_output += "
|
| 56 |
json_output = json.loads(cleaned_output) # Retry parsing
|
| 57 |
else:
|
| 58 |
raise e
|
| 59 |
|
| 60 |
-
# Step 3: Validate required structure
|
| 61 |
if "title" not in json_output or "sections" not in json_output:
|
| 62 |
raise ValueError("Missing required keys: 'title' or 'sections'.")
|
| 63 |
if not isinstance(json_output["sections"], list):
|
|
@@ -81,6 +80,7 @@ def clean_and_format_output(output):
|
|
| 81 |
"output": cleaned_output
|
| 82 |
}
|
| 83 |
|
|
|
|
| 84 |
# Function to generate learning content
|
| 85 |
def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
|
| 86 |
"""
|
|
|
|
| 34 |
def clean_and_format_output(output):
|
| 35 |
"""
|
| 36 |
Cleans, validates, and attempts to auto-repair JSON output from the AI model.
|
| 37 |
+
Handles truncated and malformed JSON gracefully.
|
| 38 |
"""
|
| 39 |
try:
|
| 40 |
# Step 1: Clean the raw output
|
| 41 |
cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output) # Remove non-ASCII characters
|
| 42 |
+
cleaned_output = re.sub(r'`|<s>|</s>|◀|▶', '', cleaned_output) # Remove extraneous symbols
|
| 43 |
+
cleaned_output = re.sub(r'^[^{]*', '', cleaned_output) # Remove text before the first '{'
|
| 44 |
+
cleaned_output = re.sub(r'[^}]*$', '', cleaned_output) # Remove text after the last '}'
|
|
|
|
| 45 |
cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip() # Normalize whitespace
|
|
|
|
| 46 |
cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
|
| 47 |
|
| 48 |
# Step 2: Attempt to parse the cleaned output
|
| 49 |
try:
|
| 50 |
json_output = json.loads(cleaned_output)
|
| 51 |
except json.JSONDecodeError as e:
|
| 52 |
+
# Attempt to auto-repair if truncation is detected
|
| 53 |
+
if "Expecting" in str(e) or cleaned_output[-1] != "}":
|
| 54 |
+
cleaned_output += "}" # Close the JSON object
|
| 55 |
json_output = json.loads(cleaned_output) # Retry parsing
|
| 56 |
else:
|
| 57 |
raise e
|
| 58 |
|
| 59 |
+
# Step 3: Validate the required structure
|
| 60 |
if "title" not in json_output or "sections" not in json_output:
|
| 61 |
raise ValueError("Missing required keys: 'title' or 'sections'.")
|
| 62 |
if not isinstance(json_output["sections"], list):
|
|
|
|
| 80 |
"output": cleaned_output
|
| 81 |
}
|
| 82 |
|
| 83 |
+
|
| 84 |
# Function to generate learning content
|
| 85 |
def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
|
| 86 |
"""
|