abhiimanyu commited on
Commit
c2cc992
·
verified ·
1 Parent(s): f154ffe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -21
app.py CHANGED
@@ -46,22 +46,12 @@ def clean_and_format_learning_content(output):
46
  cleaned_output = cleaned_output.replace('\\"', '"') # Fix improperly escaped quotes
47
  cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
48
 
49
- # Step 2: Fix the 'sections' structure
50
- # Ensure 'sections' is an array, not a single object
51
- sections_pattern = r'"sections":\s*{'
52
- if re.search(sections_pattern, cleaned_output):
53
- # Replace the invalid opening brace with an array opening
54
- cleaned_output = re.sub(sections_pattern, '"sections": [ {', cleaned_output)
55
- # Add a closing array bracket at the end of 'sections'
56
- cleaned_output = re.sub(r'(}\s*,?\s*)]}', r'} ] }', cleaned_output)
57
-
58
- # Step 3: Remove invalid syntax in 'content' fields
59
- cleaned_output = re.sub(r'\[.*?\]\(.*?\)', '', cleaned_output) # Remove Markdown-like links
60
-
61
- # Step 4: Fix missing commas between JSON objects
62
- cleaned_output = re.sub(r'(\})(\s*{)', r'\1,\2', cleaned_output)
63
 
64
- # Step 5: Fix unbalanced brackets and braces
65
  open_braces = cleaned_output.count('{')
66
  close_braces = cleaned_output.count('}')
67
  open_brackets = cleaned_output.count('[')
@@ -71,18 +61,23 @@ def clean_and_format_learning_content(output):
71
  if open_brackets > close_brackets:
72
  cleaned_output += ']' * (open_brackets - close_brackets)
73
 
74
- # Step 6: Attempt to parse JSON
 
 
 
75
  json_output = json.loads(cleaned_output)
76
 
77
- # Step 7: Validate JSON structure
78
  required_keys = ["title", "sections"]
79
  if "title" not in json_output or "sections" not in json_output:
80
  raise ValueError("Missing required keys: 'title' or 'sections'.")
81
  if not isinstance(json_output["sections"], list):
82
- raise ValueError("'sections' must be a list.")
83
- for section in json_output["sections"]:
84
- if "subheading" not in section or "content" not in section:
85
- raise ValueError("Each section must contain 'subheading' and 'content'.")
 
 
86
 
87
  return json_output
88
 
@@ -94,6 +89,7 @@ def clean_and_format_learning_content(output):
94
  "output": cleaned_output
95
  }
96
 
 
97
  # Function to generate learning content
98
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
99
  """
 
46
  cleaned_output = cleaned_output.replace('\\"', '"') # Fix improperly escaped quotes
47
  cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output) # Remove trailing commas
48
 
49
+ # Step 2: Fix invalid 'sections' fields
50
+ # Replace invalid sections (e.g., sections:) with an empty array
51
+ if re.search(r'"sections":\s*,', cleaned_output):
52
+ cleaned_output = re.sub(r'"sections":\s*,', '"sections": []', cleaned_output)
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # Fix unbalanced brackets or braces
55
  open_braces = cleaned_output.count('{')
56
  close_braces = cleaned_output.count('}')
57
  open_brackets = cleaned_output.count('[')
 
61
  if open_brackets > close_brackets:
62
  cleaned_output += ']' * (open_brackets - close_brackets)
63
 
64
+ # Fix commas between objects in arrays
65
+ cleaned_output = re.sub(r'(\})(\s*{)', r'\1,\2', cleaned_output)
66
+
67
+ # Step 3: Attempt to parse JSON
68
  json_output = json.loads(cleaned_output)
69
 
70
+ # Step 4: Validate JSON structure
71
  required_keys = ["title", "sections"]
72
  if "title" not in json_output or "sections" not in json_output:
73
  raise ValueError("Missing required keys: 'title' or 'sections'.")
74
  if not isinstance(json_output["sections"], list):
75
+ # If 'sections' is not a list, replace it with an empty list
76
+ json_output["sections"] = []
77
+ else:
78
+ for section in json_output["sections"]:
79
+ if "subheading" not in section or "content" not in section:
80
+ raise ValueError("Each section must contain 'subheading' and 'content'.")
81
 
82
  return json_output
83
 
 
89
  "output": cleaned_output
90
  }
91
 
92
+
93
  # Function to generate learning content
94
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
95
  """