abhiimanyu commited on
Commit
4617aa5
·
verified ·
1 Parent(s): f2cb135

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -35
app.py CHANGED
@@ -1,17 +1,12 @@
 
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
 
4
- # client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
5
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
6
- # Function to format the input into a suitable prompt for generating strict and engaging learning content
7
- # def format_prompt(topic, description, difficulty):
8
- # prompt = (
9
- # f"You are an expert educator. Generate highly engaging and educational learning content "
10
- # f"strictly on the topic '{topic}', with the following description: '{description}'. "
11
- # f"The content should be suitable for a '{difficulty}' difficulty level, and it should be presented in a way that helps readers clearly understand the key points. "
12
- # f"Please provide the content in paragraph form, ensuring it is both informative and interesting for the learner."
13
- # )
14
- # return prompt
15
  def format_prompt(topic, description, difficulty):
16
  prompt = (
17
  f"You are an expert educator. Generate highly engaging, educational, and structured content for the topic '{topic}'. "
@@ -31,25 +26,49 @@ def format_prompt(topic, description, difficulty):
31
  f"2. Validate the JSON format. Ensure all fields are enclosed in double quotes, with no trailing commas or missing braces.\n"
32
  f"3. Ensure the \"content\" field is fully completed without truncation.\n"
33
  f"4. Do not include extraneous symbols, explanations, or artifacts (e.g., ◀, ```python).\n"
34
- f"5. Example output:\n"
35
- f"{{\n"
36
- f" \"title\": \"Understanding Thermodynamics\",\n"
37
- f" \"sections\": [\n"
38
- f" {{\n"
39
- f" \"subheading\": \"What is Thermodynamics?\",\n"
40
- f" \"content\": \"Thermodynamics is the study of energy, heat, and how they work together. It explains everyday phenomena, like why ice melts in heat.\"\n"
41
- f" }},\n"
42
- f" {{\n"
43
- f" \"subheading\": \"The First Law of Thermodynamics\",\n"
44
- f" \"content\": \"The First Law states that energy cannot be created or destroyed. It can only change from one form to another, such as heat turning into motion.\"\n"
45
- f" }}\n"
46
- f" ]\n"
47
- f"}}"
48
  )
49
  return prompt
50
 
51
- # The function to generate learning content based on the inputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
 
 
 
53
  temperature = max(float(temperature), 1e-2) # Ensure minimum temperature
54
  top_p = float(top_p)
55
 
@@ -62,27 +81,26 @@ def generate_learning_content(topic, description, difficulty, temperature=0.9, m
62
  seed=42,
63
  )
64
 
65
- # Format the prompt using the topic, description, and difficulty
66
  formatted_prompt = format_prompt(topic, description, difficulty)
67
 
68
  # Stream the output from the model
69
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
70
 
71
- output = ""
72
  for response in stream:
73
- output += response.token.text
74
- yield output
75
- return output
 
76
 
77
- # Define the input fields for the topic, description, and difficulty level
78
  with gr.Blocks(theme="ocean") as demo:
79
  gr.HTML("<h1><center>Learning Content Generator</center></h1>")
80
 
81
- # Input fields for topic and description
82
  topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
83
  description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
84
-
85
- # Dropdown for difficulty level (High, Medium, Low)
86
  difficulty_input = gr.Dropdown(
87
  label="Difficulty Level",
88
  choices=["High", "Medium", "Low"],
@@ -97,7 +115,7 @@ with gr.Blocks(theme="ocean") as demo:
97
  repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
98
 
99
  # Output field for generated learning content
100
- output = gr.Textbox(label="Generated Learning Content", lines=10)
101
 
102
  # Button to generate content
103
  submit_button = gr.Button("Generate Learning Content")
 
1
+ import re
2
+ import json
3
  from huggingface_hub import InferenceClient
4
  import gradio as gr
5
 
6
+ # Initialize HuggingFace client
7
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
8
+
9
+ # Function to format the input into a strict JSON-based prompt
 
 
 
 
 
 
 
10
  def format_prompt(topic, description, difficulty):
11
  prompt = (
12
  f"You are an expert educator. Generate highly engaging, educational, and structured content for the topic '{topic}'. "
 
26
  f"2. Validate the JSON format. Ensure all fields are enclosed in double quotes, with no trailing commas or missing braces.\n"
27
  f"3. Ensure the \"content\" field is fully completed without truncation.\n"
28
  f"4. Do not include extraneous symbols, explanations, or artifacts (e.g., ◀, ```python).\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
  return prompt
31
 
32
+ # Function to clean, validate, and parse JSON output
33
+ def filter_and_validate_output(raw_output):
34
+ """
35
+ Cleans, validates, and parses the raw output from the AI model.
36
+ """
37
+ try:
38
+ # Step 1: Clean the output
39
+ cleaned_output = re.sub(r"</?s>", "", raw_output) # Remove unwanted symbols like </s>
40
+ cleaned_output = re.sub(r"```.*?```", "", cleaned_output, flags=re.DOTALL) # Remove Markdown blocks
41
+ cleaned_output = cleaned_output.strip() # Remove leading/trailing whitespace
42
+
43
+ # Step 2: Parse the JSON
44
+ structured_content = json.loads(cleaned_output)
45
+
46
+ # Step 3: Validate required keys
47
+ if "title" not in structured_content or "sections" not in structured_content:
48
+ raise ValueError("Missing required keys: 'title' or 'sections'.")
49
+
50
+ # Step 4: Validate sections
51
+ if not isinstance(structured_content["sections"], list):
52
+ raise ValueError("'sections' must be a list.")
53
+
54
+ for section in structured_content["sections"]:
55
+ if "subheading" not in section or "content" not in section:
56
+ raise ValueError("Each section must contain 'subheading' and 'content'.")
57
+
58
+ return structured_content # Valid JSON
59
+ except (json.JSONDecodeError, ValueError) as e:
60
+ # Return an error message if validation fails
61
+ return {
62
+ "error": "Invalid JSON response",
63
+ "details": str(e),
64
+ "raw_output": raw_output[:500] # Include snippet for debugging
65
+ }
66
+
67
+ # Function to generate learning content
68
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
69
+ """
70
+ Generates learning content and validates the output.
71
+ """
72
  temperature = max(float(temperature), 1e-2) # Ensure minimum temperature
73
  top_p = float(top_p)
74
 
 
81
  seed=42,
82
  )
83
 
84
+ # Format the prompt
85
  formatted_prompt = format_prompt(topic, description, difficulty)
86
 
87
  # Stream the output from the model
88
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
89
 
90
+ raw_output = ""
91
  for response in stream:
92
+ raw_output += response.token.text
93
+
94
+ # Filter and validate the JSON output
95
+ return filter_and_validate_output(raw_output)
96
 
97
+ # Define the Gradio interface
98
  with gr.Blocks(theme="ocean") as demo:
99
  gr.HTML("<h1><center>Learning Content Generator</center></h1>")
100
 
101
+ # Input fields for topic, description, and difficulty
102
  topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
103
  description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
 
 
104
  difficulty_input = gr.Dropdown(
105
  label="Difficulty Level",
106
  choices=["High", "Medium", "Low"],
 
115
  repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
116
 
117
  # Output field for generated learning content
118
+ output = gr.Textbox(label="Generated Learning Content", lines=15)
119
 
120
  # Button to generate content
121
  submit_button = gr.Button("Generate Learning Content")