abhiimanyu commited on
Commit
dc6c5d5
·
verified ·
1 Parent(s): a0aa11b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -28
app.py CHANGED
@@ -33,35 +33,23 @@ def format_prompt(topic, description, difficulty):
33
  # Function to clean and format the AI output
34
  def clean_and_format_output(output):
35
  """
36
- Cleans and validates the output to ensure it is valid JSON.
37
  """
38
- # Remove non-ASCII characters
39
- cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output)
40
-
41
- # Remove extraneous symbols like ◀ and backticks
42
- cleaned_output = re.sub(r'`|<s>|</s>|◀', '', cleaned_output)
43
-
44
- # Remove text before the first '{' and after the last '}'
45
- cleaned_output = re.sub(r'^[^{]*', '', cleaned_output)
46
- cleaned_output = re.sub(r'[^}]*$', '', cleaned_output)
47
-
48
- # Replace improperly escaped characters (e.g., \_)
49
- cleaned_output = re.sub(r'\\_', '_', cleaned_output)
50
-
51
- # Normalize whitespace
52
- cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip()
53
-
54
- # Remove improperly escaped quotes
55
- cleaned_output = cleaned_output.replace('\\"', '"')
56
-
57
- # Remove trailing commas
58
- cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output)
59
-
60
  try:
61
- # Attempt to parse the cleaned string as JSON
 
 
 
 
 
 
 
 
 
 
62
  json_output = json.loads(cleaned_output)
63
-
64
- # Check for required structure
65
  if "title" not in json_output or "sections" not in json_output:
66
  raise ValueError("Missing required keys: 'title' or 'sections'.")
67
  if not isinstance(json_output["sections"], list):
@@ -69,9 +57,18 @@ def clean_and_format_output(output):
69
  for section in json_output["sections"]:
70
  if "subheading" not in section or "content" not in section:
71
  raise ValueError("Each section must contain 'subheading' and 'content'.")
72
-
73
  return json_output
74
- except (json.JSONDecodeError, ValueError) as e:
 
 
 
 
 
 
 
 
 
75
  return {
76
  "error": "Failed to parse or validate output as JSON",
77
  "details": str(e),
 
33
  # Function to clean and format the AI output
34
  def clean_and_format_output(output):
35
  """
36
+ Cleans, validates, and attempts to parse JSON output. Handles truncation and formatting errors.
37
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  try:
39
+ # Step 1: Clean the raw output
40
+ cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output) # Remove non-ASCII characters
41
+ cleaned_output = re.sub(r'`|<s>|</s>|◀', '', cleaned_output) # Remove extraneous symbols
42
+ cleaned_output = re.sub(r'^[^{]*', '', cleaned_output) # Remove text before first '{'
43
+ cleaned_output = re.sub(r'[^}]*$', '', cleaned_output) # Remove text after last '}'
44
+ cleaned_output = re.sub(r'\\_', '_', cleaned_output) # Replace improperly escaped underscores
45
+ cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip() # Normalize whitespace
46
+ cleaned_output = cleaned_output.replace('\\"', '"') # Fix improperly escaped quotes
47
+ cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
48
+
49
+ # Step 2: Attempt to parse as JSON
50
  json_output = json.loads(cleaned_output)
51
+
52
+ # Step 3: Validate required structure
53
  if "title" not in json_output or "sections" not in json_output:
54
  raise ValueError("Missing required keys: 'title' or 'sections'.")
55
  if not isinstance(json_output["sections"], list):
 
57
  for section in json_output["sections"]:
58
  if "subheading" not in section or "content" not in section:
59
  raise ValueError("Each section must contain 'subheading' and 'content'.")
60
+
61
  return json_output
62
+
63
+ except json.JSONDecodeError as e:
64
+ # Handle JSON decoding errors
65
+ return {
66
+ "error": "Failed to parse or validate output as JSON",
67
+ "details": f"JSON decoding error: {str(e)}",
68
+ "output": cleaned_output
69
+ }
70
+ except ValueError as e:
71
+ # Handle validation errors
72
  return {
73
  "error": "Failed to parse or validate output as JSON",
74
  "details": str(e),