abhiimanyu commited on
Commit
3668272
·
verified ·
1 Parent(s): 3f1d78e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -34,30 +34,29 @@ def format_prompt(topic, description, difficulty):
34
  def clean_and_format_output(output):
35
  """
36
  Cleans, validates, and attempts to auto-repair JSON output from the AI model.
 
37
  """
38
  try:
39
  # Step 1: Clean the raw output
40
  cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output) # Remove non-ASCII characters
41
- cleaned_output = re.sub(r'`|<s>|</s>|◀', '', cleaned_output) # Remove extraneous symbols
42
- cleaned_output = re.sub(r'^[^{]*', '', cleaned_output) # Remove text before first '{'
43
- cleaned_output = re.sub(r'[^}]*$', '', cleaned_output) # Remove text after last '}'
44
- cleaned_output = re.sub(r'\\_', '_', cleaned_output) # Replace improperly escaped underscores
45
  cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip() # Normalize whitespace
46
- cleaned_output = cleaned_output.replace('\\"', '"') # Fix improperly escaped quotes
47
  cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
48
 
49
  # Step 2: Attempt to parse the cleaned output
50
  try:
51
  json_output = json.loads(cleaned_output)
52
  except json.JSONDecodeError as e:
53
- # Attempt to auto-repair truncated JSON
54
- if "Expecting ',' delimiter" in str(e):
55
- cleaned_output += "]}" # Add missing array and object closures
56
  json_output = json.loads(cleaned_output) # Retry parsing
57
  else:
58
  raise e
59
 
60
- # Step 3: Validate required structure
61
  if "title" not in json_output or "sections" not in json_output:
62
  raise ValueError("Missing required keys: 'title' or 'sections'.")
63
  if not isinstance(json_output["sections"], list):
@@ -81,6 +80,7 @@ def clean_and_format_output(output):
81
  "output": cleaned_output
82
  }
83
 
 
84
  # Function to generate learning content
85
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
86
  """
 
34
  def clean_and_format_output(output):
35
  """
36
  Cleans, validates, and attempts to auto-repair JSON output from the AI model.
37
+ Handles truncated and malformed JSON gracefully.
38
  """
39
  try:
40
  # Step 1: Clean the raw output
41
  cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output) # Remove non-ASCII characters
42
+ cleaned_output = re.sub(r'`|<s>|</s>|◀|▶', '', cleaned_output) # Remove extraneous symbols
43
+ cleaned_output = re.sub(r'^[^{]*', '', cleaned_output) # Remove text before the first '{'
44
+ cleaned_output = re.sub(r'[^}]*$', '', cleaned_output) # Remove text after the last '}'
 
45
  cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip() # Normalize whitespace
 
46
  cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
47
 
48
  # Step 2: Attempt to parse the cleaned output
49
  try:
50
  json_output = json.loads(cleaned_output)
51
  except json.JSONDecodeError as e:
52
+ # Attempt to auto-repair if truncation is detected
53
+ if "Expecting" in str(e) or cleaned_output[-1] != "}":
54
+ cleaned_output += "}" # Close the JSON object
55
  json_output = json.loads(cleaned_output) # Retry parsing
56
  else:
57
  raise e
58
 
59
+ # Step 3: Validate the required structure
60
  if "title" not in json_output or "sections" not in json_output:
61
  raise ValueError("Missing required keys: 'title' or 'sections'.")
62
  if not isinstance(json_output["sections"], list):
 
80
  "output": cleaned_output
81
  }
82
 
83
+
84
  # Function to generate learning content
85
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
86
  """