abhiimanyu commited on
Commit
d4befbe
·
verified ·
1 Parent(s): 00bcad5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -39
app.py CHANGED
@@ -1,17 +1,10 @@
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
 
4
- # client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
5
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
6
- # Function to format the input into a suitable prompt for generating strict and engaging learning content
7
- # def format_prompt(topic, description, difficulty):
8
- # prompt = (
9
- # f"You are an expert educator. Generate highly engaging and educational learning content "
10
- # f"strictly on the topic '{topic}', with the following description: '{description}'. "
11
- # f"The content should be suitable for a '{difficulty}' difficulty level, and it should be presented in a way that helps readers clearly understand the key points. "
12
- # f"Please provide the content in paragraph form, ensuring it is both informative and interesting for the learner."
13
- # )
14
- # return prompt
15
  def format_prompt(topic, description, difficulty):
16
  prompt = (
17
  f"You are an expert educator. Generate highly engaging, educational, and structured content on the topic '{topic}'. "
@@ -32,30 +25,62 @@ def format_prompt(topic, description, difficulty):
32
  f"3. Ensure all keys and values are properly enclosed in double quotes.\n"
33
  f"4. Validate the JSON before returning it to ensure it is syntactically correct and complete.\n"
34
  f"5. Do not use any extra characters like ◀ or </s>.\n"
35
- f"### Example Output:\n"
36
- f"{{\n"
37
- f" \"title\": \"Understanding the Basics of Thermodynamics\",\n"
38
- f" \"sections\": [\n"
39
- f" {{\n"
40
- f" \"subheading\": \"What is Thermodynamics?\",\n"
41
- f" \"content\": \"Thermodynamics is the study of energy, heat, and how they interact. It explains phenomena like ice melting or engines running.\"\n"
42
- f" }},\n"
43
- f" {{\n"
44
- f" \"subheading\": \"The Four Laws of Thermodynamics\",\n"
45
- f" \"content\": \"The four laws govern how energy moves and changes. For example, the first law states that energy cannot be created or destroyed, only transformed.\"\n"
46
- f" }}\n"
47
- f" ]\n"
48
- f"}}"
49
  )
50
  return prompt
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
-
54
-
55
-
56
-
57
- # The function to generate learning content based on the inputs
58
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
 
 
 
59
  temperature = max(float(temperature), 1e-2) # Ensure minimum temperature
60
  top_p = float(top_p)
61
 
@@ -68,27 +93,26 @@ def generate_learning_content(topic, description, difficulty, temperature=0.9, m
68
  seed=42,
69
  )
70
 
71
- # Format the prompt using the topic, description, and difficulty
72
  formatted_prompt = format_prompt(topic, description, difficulty)
73
 
74
  # Stream the output from the model
75
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
76
 
77
- output = ""
78
  for response in stream:
79
- output += response.token.text
80
- yield output
81
- return output
82
 
83
- # Define the input fields for the topic, description, and difficulty level
 
 
 
84
  with gr.Blocks(theme="ocean") as demo:
85
  gr.HTML("<h1><center>Learning Content Generator</center></h1>")
86
 
87
- # Input fields for topic and description
88
  topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
89
  description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
90
-
91
- # Dropdown for difficulty level (High, Medium, Low)
92
  difficulty_input = gr.Dropdown(
93
  label="Difficulty Level",
94
  choices=["High", "Medium", "Low"],
@@ -103,7 +127,7 @@ with gr.Blocks(theme="ocean") as demo:
103
  repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
104
 
105
  # Output field for generated learning content
106
- output = gr.Textbox(label="Generated Learning Content", lines=10)
107
 
108
  # Button to generate content
109
  submit_button = gr.Button("Generate Learning Content")
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
 
4
+ # Initialize HuggingFace client
5
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
6
+
7
+ # Function to format the input into a strict JSON-based prompt
 
 
 
 
 
 
 
8
  def format_prompt(topic, description, difficulty):
9
  prompt = (
10
  f"You are an expert educator. Generate highly engaging, educational, and structured content on the topic '{topic}'. "
 
25
  f"3. Ensure all keys and values are properly enclosed in double quotes.\n"
26
  f"4. Validate the JSON before returning it to ensure it is syntactically correct and complete.\n"
27
  f"5. Do not use any extra characters like ◀ or </s>.\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  )
29
  return prompt
30
 
31
+ # Function to clean and format the AI output
32
+ def clean_and_format_output(output):
33
+ """
34
+ Cleans and validates the output to ensure it is valid JSON.
35
+ """
36
+ # Remove non-ASCII characters
37
+ cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output)
38
+
39
+ # Remove extraneous symbols like ◀ and backticks
40
+ cleaned_output = re.sub(r'`|<s>|</s>|◀', '', cleaned_output)
41
+
42
+ # Remove text before the first '{' and after the last '}'
43
+ cleaned_output = re.sub(r'^[^{]*', '', cleaned_output)
44
+ cleaned_output = re.sub(r'[^}]*$', '', cleaned_output)
45
+
46
+ # Replace improperly escaped characters (e.g., \_)
47
+ cleaned_output = re.sub(r'\\_', '_', cleaned_output)
48
+
49
+ # Normalize whitespace
50
+ cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip()
51
+
52
+ # Remove improperly escaped quotes
53
+ cleaned_output = cleaned_output.replace('\\"', '"')
54
+
55
+ # Remove trailing commas
56
+ cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output)
57
+
58
+ try:
59
+ # Attempt to parse the cleaned string as JSON
60
+ json_output = json.loads(cleaned_output)
61
+
62
+ # Check for required structure
63
+ if "title" not in json_output or "sections" not in json_output:
64
+ raise ValueError("Missing required keys: 'title' or 'sections'.")
65
+ if not isinstance(json_output["sections"], list):
66
+ raise ValueError("'sections' must be a list.")
67
+ for section in json_output["sections"]:
68
+ if "subheading" not in section or "content" not in section:
69
+ raise ValueError("Each section must contain 'subheading' and 'content'.")
70
+
71
+ return json_output
72
+ except (json.JSONDecodeError, ValueError) as e:
73
+ return {
74
+ "error": "Failed to parse or validate output as JSON",
75
+ "details": str(e),
76
+ "output": cleaned_output
77
+ }
78
 
79
+ # Function to generate learning content
 
 
 
 
80
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
81
+ """
82
+ Generates learning content and validates the output.
83
+ """
84
  temperature = max(float(temperature), 1e-2) # Ensure minimum temperature
85
  top_p = float(top_p)
86
 
 
93
  seed=42,
94
  )
95
 
96
+ # Format the prompt
97
  formatted_prompt = format_prompt(topic, description, difficulty)
98
 
99
  # Stream the output from the model
100
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
101
 
102
+ raw_output = ""
103
  for response in stream:
104
+ raw_output += response.token.text
 
 
105
 
106
+ # Clean and validate the raw output
107
+ return clean_and_format_output(raw_output)
108
+
109
+ # Define the Gradio interface
110
  with gr.Blocks(theme="ocean") as demo:
111
  gr.HTML("<h1><center>Learning Content Generator</center></h1>")
112
 
113
+ # Input fields for topic, description, and difficulty
114
  topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
115
  description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
 
 
116
  difficulty_input = gr.Dropdown(
117
  label="Difficulty Level",
118
  choices=["High", "Medium", "Low"],
 
127
  repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
128
 
129
  # Output field for generated learning content
130
+ output = gr.Textbox(label="Generated Learning Content", lines=15)
131
 
132
  # Button to generate content
133
  submit_button = gr.Button("Generate Learning Content")