Spaces:

abhiimanyu
/

LearningContent

Sleeping

App Files Files Community

abhiimanyu commited on Nov 18, 2024

Commit

4617aa5

verified ·

1 Parent(s): f2cb135

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -35

app.py CHANGED Viewed

@@ -1,17 +1,12 @@
 from huggingface_hub import InferenceClient
 import gradio as gr
-# client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
-# Function to format the input into a suitable prompt for generating strict and engaging learning content
-# def format_prompt(topic, description, difficulty):
-#     prompt = (
-#         f"You are an expert educator. Generate highly engaging and educational learning content "
-#         f"strictly on the topic '{topic}', with the following description: '{description}'. "
-#         f"The content should be suitable for a '{difficulty}' difficulty level, and it should be presented in a way that helps readers clearly understand the key points. "
-#         f"Please provide the content in paragraph form, ensuring it is both informative and interesting for the learner."
-#     )
-#     return prompt
 def format_prompt(topic, description, difficulty):
     prompt = (
         f"You are an expert educator. Generate highly engaging, educational, and structured content for the topic '{topic}'. "
@@ -31,25 +26,49 @@ def format_prompt(topic, description, difficulty):
         f"2. Validate the JSON format. Ensure all fields are enclosed in double quotes, with no trailing commas or missing braces.\n"
         f"3. Ensure the \"content\" field is fully completed without truncation.\n"
         f"4. Do not include extraneous symbols, explanations, or artifacts (e.g., ◀, ```python).\n"
-        f"5. Example output:\n"
-        f"{{\n"
-        f"  \"title\": \"Understanding Thermodynamics\",\n"
-        f"  \"sections\": [\n"
-        f"    {{\n"
-        f"      \"subheading\": \"What is Thermodynamics?\",\n"
-        f"      \"content\": \"Thermodynamics is the study of energy, heat, and how they work together. It explains everyday phenomena, like why ice melts in heat.\"\n"
-        f"    }},\n"
-        f"    {{\n"
-        f"      \"subheading\": \"The First Law of Thermodynamics\",\n"
-        f"      \"content\": \"The First Law states that energy cannot be created or destroyed. It can only change from one form to another, such as heat turning into motion.\"\n"
-        f"    }}\n"
-        f"  ]\n"
-        f"}}"
     )
     return prompt
-# The function to generate learning content based on the inputs
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
     temperature = max(float(temperature), 1e-2)  # Ensure minimum temperature
     top_p = float(top_p)
@@ -62,27 +81,26 @@ def generate_learning_content(topic, description, difficulty, temperature=0.9, m
         seed=42,
     )
-    # Format the prompt using the topic, description, and difficulty
     formatted_prompt = format_prompt(topic, description, difficulty)
     # Stream the output from the model
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    output = ""
     for response in stream:
-        output += response.token.text
-        yield output
-    return output
-# Define the input fields for the topic, description, and difficulty level
 with gr.Blocks(theme="ocean") as demo:
     gr.HTML("<h1><center>Learning Content Generator</center></h1>")
-    # Input fields for topic and description
     topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
     description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
-    # Dropdown for difficulty level (High, Medium, Low)
     difficulty_input = gr.Dropdown(
         label="Difficulty Level",
         choices=["High", "Medium", "Low"],
@@ -97,7 +115,7 @@ with gr.Blocks(theme="ocean") as demo:
     repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
     # Output field for generated learning content
-    output = gr.Textbox(label="Generated Learning Content", lines=10)
     # Button to generate content
     submit_button = gr.Button("Generate Learning Content")

+import re
+import json
 from huggingface_hub import InferenceClient
 import gradio as gr
+# Initialize HuggingFace client
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+# Function to format the input into a strict JSON-based prompt
 def format_prompt(topic, description, difficulty):
     prompt = (
         f"You are an expert educator. Generate highly engaging, educational, and structured content for the topic '{topic}'. "
         f"2. Validate the JSON format. Ensure all fields are enclosed in double quotes, with no trailing commas or missing braces.\n"
         f"3. Ensure the \"content\" field is fully completed without truncation.\n"
         f"4. Do not include extraneous symbols, explanations, or artifacts (e.g., ◀, ```python).\n"
     )
     return prompt
+# Function to clean, validate, and parse JSON output
+def filter_and_validate_output(raw_output):
+    """
+    Cleans, validates, and parses the raw output from the AI model.
+    """
+    try:
+        # Step 1: Clean the output
+        cleaned_output = re.sub(r"</?s>", "", raw_output)  # Remove unwanted symbols like </s>
+        cleaned_output = re.sub(r"```.*?```", "", cleaned_output, flags=re.DOTALL)  # Remove Markdown blocks
+        cleaned_output = cleaned_output.strip()  # Remove leading/trailing whitespace
+        # Step 2: Parse the JSON
+        structured_content = json.loads(cleaned_output)
+        # Step 3: Validate required keys
+        if "title" not in structured_content or "sections" not in structured_content:
+            raise ValueError("Missing required keys: 'title' or 'sections'.")
+        # Step 4: Validate sections
+        if not isinstance(structured_content["sections"], list):
+            raise ValueError("'sections' must be a list.")
+        for section in structured_content["sections"]:
+            if "subheading" not in section or "content" not in section:
+                raise ValueError("Each section must contain 'subheading' and 'content'.")
+        return structured_content  # Valid JSON
+    except (json.JSONDecodeError, ValueError) as e:
+        # Return an error message if validation fails
+        return {
+            "error": "Invalid JSON response",
+            "details": str(e),
+            "raw_output": raw_output[:500]  # Include snippet for debugging
+        }
+# Function to generate learning content
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
+    """
+    Generates learning content and validates the output.
+    """
     temperature = max(float(temperature), 1e-2)  # Ensure minimum temperature
     top_p = float(top_p)
         seed=42,
     )
+    # Format the prompt
     formatted_prompt = format_prompt(topic, description, difficulty)
     # Stream the output from the model
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    raw_output = ""
     for response in stream:
+        raw_output += response.token.text
+    # Filter and validate the JSON output
+    return filter_and_validate_output(raw_output)
+# Define the Gradio interface
 with gr.Blocks(theme="ocean") as demo:
     gr.HTML("<h1><center>Learning Content Generator</center></h1>")
+    # Input fields for topic, description, and difficulty
     topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
     description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
     difficulty_input = gr.Dropdown(
         label="Difficulty Level",
         choices=["High", "Medium", "Low"],
     repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
     # Output field for generated learning content
+    output = gr.Textbox(label="Generated Learning Content", lines=15)
     # Button to generate content
     submit_button = gr.Button("Generate Learning Content")