abhiimanyu commited on
Commit
00bcad5
·
verified ·
1 Parent(s): ffcfed5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -70
app.py CHANGED
@@ -1,85 +1,61 @@
1
- import re
2
- import json
3
  from huggingface_hub import InferenceClient
4
  import gradio as gr
5
 
6
- # Initialize HuggingFace client
7
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
8
-
9
- # Function to format the input into a strict JSON-based prompt
 
 
 
 
 
 
 
10
  def format_prompt(topic, description, difficulty):
11
  prompt = (
12
- f"You are an expert educator. Generate highly engaging, educational, and structured content for the topic '{topic}'. "
13
  f"Use the following description as context: '{description}'. "
14
- f"The content should be suitable for a '{difficulty}' difficulty level and MUST strictly adhere to the following JSON format:\n\n"
15
  f"{{\n"
16
- f" \"title\": \"[Title of the topic]\",\n"
17
  f" \"sections\": [\n"
18
  f" {{\n"
19
- f" \"subheading\": \"[A clear subheading for the section]\",\n"
20
- f" \"content\": \"[Detailed content explaining the subheading. Write in simple, engaging language suitable for young learners.]\"\n"
21
  f" }}\n"
22
  f" ]\n"
23
  f"}}\n\n"
24
  f"### Important Guidelines:\n"
25
- f"1. Return only the JSON object and nothing else (no Markdown, code, or comments).\n"
26
- f"2. Validate the JSON format. Ensure all fields are enclosed in double quotes, with no trailing commas or missing braces.\n"
27
- f"3. Ensure the \"content\" field is fully completed without truncation.\n"
28
- f"4. Do not include extraneous symbols, explanations, or artifacts (e.g., ◀, ```python).\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
  return prompt
31
 
32
- # Function to clean, validate, and parse JSON output
33
- def filter_and_validate_output(raw_output):
34
- """
35
- Cleans, validates, and parses the raw output from the AI model.
36
- """
37
- try:
38
- # Step 1: Clean the raw output
39
- cleaned_output = re.sub(r"</?s>", "", raw_output) # Remove </s> or <s>
40
- cleaned_output = re.sub(r"```.*?```", "", cleaned_output, flags=re.DOTALL) # Remove Markdown
41
- cleaned_output = re.sub(r"\\", "", cleaned_output) # Remove stray backslashes
42
- cleaned_output = re.sub(r"[^\x00-\x7F]+", "", cleaned_output) # Remove non-ASCII characters
43
- cleaned_output = cleaned_output.strip() # Remove leading and trailing whitespace
44
 
45
- # Step 2: Extract the JSON block
46
- # This assumes JSON starts with "{" and ends with "}"
47
- json_match = re.search(r"\{.*\}", cleaned_output, flags=re.DOTALL)
48
- if not json_match:
49
- raise ValueError("No valid JSON block found in the response.")
50
 
51
- json_string = json_match.group()
52
 
53
- # Step 3: Parse the JSON
54
- structured_content = json.loads(json_string)
55
 
56
- # Step 4: Validate required keys
57
- if "title" not in structured_content or "sections" not in structured_content:
58
- raise ValueError("Missing required keys: 'title' or 'sections'.")
59
 
60
- # Step 5: Validate sections
61
- if not isinstance(structured_content["sections"], list):
62
- raise ValueError("'sections' must be a list.")
63
- for section in structured_content["sections"]:
64
- if "subheading" not in section or "content" not in section:
65
- raise ValueError("Each section must contain 'subheading' and 'content'.")
66
-
67
- return structured_content # Return valid JSON
68
-
69
- except (json.JSONDecodeError, ValueError) as e:
70
- # Return an error message if validation fails
71
- return {
72
- "error": "Invalid JSON response",
73
- "details": str(e),
74
- "raw_output": raw_output[:500] # Include snippet for debugging
75
- }
76
-
77
-
78
- # Function to generate learning content
79
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
80
- """
81
- Generates learning content and validates the output.
82
- """
83
  temperature = max(float(temperature), 1e-2) # Ensure minimum temperature
84
  top_p = float(top_p)
85
 
@@ -92,27 +68,27 @@ def generate_learning_content(topic, description, difficulty, temperature=0.9, m
92
  seed=42,
93
  )
94
 
95
- # Format the prompt
96
  formatted_prompt = format_prompt(topic, description, difficulty)
97
 
98
  # Stream the output from the model
99
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
100
 
101
- raw_output = ""
102
  for response in stream:
103
- raw_output += response.token.text
 
 
104
 
105
- # Use the enhanced filtering and validation function
106
- return filter_and_validate_output(raw_output)
107
-
108
-
109
- # Define the Gradio interface
110
  with gr.Blocks(theme="ocean") as demo:
111
  gr.HTML("<h1><center>Learning Content Generator</center></h1>")
112
 
113
- # Input fields for topic, description, and difficulty
114
  topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
115
  description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
 
 
116
  difficulty_input = gr.Dropdown(
117
  label="Difficulty Level",
118
  choices=["High", "Medium", "Low"],
@@ -127,7 +103,7 @@ with gr.Blocks(theme="ocean") as demo:
127
  repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
128
 
129
  # Output field for generated learning content
130
- output = gr.Textbox(label="Generated Learning Content", lines=15)
131
 
132
  # Button to generate content
133
  submit_button = gr.Button("Generate Learning Content")
@@ -141,4 +117,4 @@ with gr.Blocks(theme="ocean") as demo:
141
 
142
  # Launch the app
143
  if __name__ == "__main__":
144
- demo.launch()
 
 
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
 
4
+ # client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
5
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
6
+ # Function to format the input into a suitable prompt for generating strict and engaging learning content
7
+ # def format_prompt(topic, description, difficulty):
8
+ # prompt = (
9
+ # f"You are an expert educator. Generate highly engaging and educational learning content "
10
+ # f"strictly on the topic '{topic}', with the following description: '{description}'. "
11
+ # f"The content should be suitable for a '{difficulty}' difficulty level, and it should be presented in a way that helps readers clearly understand the key points. "
12
+ # f"Please provide the content in paragraph form, ensuring it is both informative and interesting for the learner."
13
+ # )
14
+ # return prompt
15
  def format_prompt(topic, description, difficulty):
16
  prompt = (
17
+ f"You are an expert educator. Generate highly engaging, educational, and structured content on the topic '{topic}'. "
18
  f"Use the following description as context: '{description}'. "
19
+ f"The content should be suitable for a '{difficulty}' difficulty level and MUST strictly adhere to the following JSON structure:\n\n"
20
  f"{{\n"
21
+ f" \"title\": \"[The title of the topic]\",\n"
22
  f" \"sections\": [\n"
23
  f" {{\n"
24
+ f" \"subheading\": \"[A clear subheading summarizing the section]\",\n"
25
+ f" \"content\": \"[Detailed explanation of the section content, written in simple and engaging language.]\"\n"
26
  f" }}\n"
27
  f" ]\n"
28
  f"}}\n\n"
29
  f"### Important Guidelines:\n"
30
+ f"1. Return only a valid JSON object with no additional text, explanations, or symbols.\n"
31
+ f"2. Do not include formatting like ```json or any other delimiters.\n"
32
+ f"3. Ensure all keys and values are properly enclosed in double quotes.\n"
33
+ f"4. Validate the JSON before returning it to ensure it is syntactically correct and complete.\n"
34
+ f"5. Do not use any extra characters like ◀ or </s>.\n"
35
+ f"### Example Output:\n"
36
+ f"{{\n"
37
+ f" \"title\": \"Understanding the Basics of Thermodynamics\",\n"
38
+ f" \"sections\": [\n"
39
+ f" {{\n"
40
+ f" \"subheading\": \"What is Thermodynamics?\",\n"
41
+ f" \"content\": \"Thermodynamics is the study of energy, heat, and how they interact. It explains phenomena like ice melting or engines running.\"\n"
42
+ f" }},\n"
43
+ f" {{\n"
44
+ f" \"subheading\": \"The Four Laws of Thermodynamics\",\n"
45
+ f" \"content\": \"The four laws govern how energy moves and changes. For example, the first law states that energy cannot be created or destroyed, only transformed.\"\n"
46
+ f" }}\n"
47
+ f" ]\n"
48
+ f"}}"
49
  )
50
  return prompt
51
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
 
 
 
 
 
53
 
 
54
 
 
 
55
 
 
 
 
56
 
57
+ # The function to generate learning content based on the inputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
 
 
 
59
  temperature = max(float(temperature), 1e-2) # Ensure minimum temperature
60
  top_p = float(top_p)
61
 
 
68
  seed=42,
69
  )
70
 
71
+ # Format the prompt using the topic, description, and difficulty
72
  formatted_prompt = format_prompt(topic, description, difficulty)
73
 
74
  # Stream the output from the model
75
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
76
 
77
+ output = ""
78
  for response in stream:
79
+ output += response.token.text
80
+ yield output
81
+ return output
82
 
83
+ # Define the input fields for the topic, description, and difficulty level
 
 
 
 
84
  with gr.Blocks(theme="ocean") as demo:
85
  gr.HTML("<h1><center>Learning Content Generator</center></h1>")
86
 
87
+ # Input fields for topic and description
88
  topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
89
  description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
90
+
91
+ # Dropdown for difficulty level (High, Medium, Low)
92
  difficulty_input = gr.Dropdown(
93
  label="Difficulty Level",
94
  choices=["High", "Medium", "Low"],
 
103
  repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
104
 
105
  # Output field for generated learning content
106
+ output = gr.Textbox(label="Generated Learning Content", lines=10)
107
 
108
  # Button to generate content
109
  submit_button = gr.Button("Generate Learning Content")
 
117
 
118
  # Launch the app
119
  if __name__ == "__main__":
120
+ demo.launch()