Spaces:

abhiimanyu
/

LearningContent

Sleeping

App Files Files Community

abhiimanyu commited on Nov 18, 2024

Commit

00bcad5

verified ·

1 Parent(s): ffcfed5

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -70

app.py CHANGED Viewed

@@ -1,85 +1,61 @@
-import re
-import json
 from huggingface_hub import InferenceClient
 import gradio as gr
-# Initialize HuggingFace client
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
-# Function to format the input into a strict JSON-based prompt
 def format_prompt(topic, description, difficulty):
     prompt = (
-        f"You are an expert educator. Generate highly engaging, educational, and structured content for the topic '{topic}'. "
         f"Use the following description as context: '{description}'. "
-        f"The content should be suitable for a '{difficulty}' difficulty level and MUST strictly adhere to the following JSON format:\n\n"
         f"{{\n"
-        f"  \"title\": \"[Title of the topic]\",\n"
         f"  \"sections\": [\n"
         f"    {{\n"
-        f"      \"subheading\": \"[A clear subheading for the section]\",\n"
-        f"      \"content\": \"[Detailed content explaining the subheading. Write in simple, engaging language suitable for young learners.]\"\n"
         f"    }}\n"
         f"  ]\n"
         f"}}\n\n"
         f"### Important Guidelines:\n"
-        f"1. Return only the JSON object and nothing else (no Markdown, code, or comments).\n"
-        f"2. Validate the JSON format. Ensure all fields are enclosed in double quotes, with no trailing commas or missing braces.\n"
-        f"3. Ensure the \"content\" field is fully completed without truncation.\n"
-        f"4. Do not include extraneous symbols, explanations, or artifacts (e.g., ◀, ```python).\n"
     )
     return prompt
-# Function to clean, validate, and parse JSON output
-def filter_and_validate_output(raw_output):
-    """
-    Cleans, validates, and parses the raw output from the AI model.
-    """
-    try:
-        # Step 1: Clean the raw output
-        cleaned_output = re.sub(r"</?s>", "", raw_output)  # Remove </s> or <s>
-        cleaned_output = re.sub(r"```.*?```", "", cleaned_output, flags=re.DOTALL)  # Remove Markdown
-        cleaned_output = re.sub(r"\\", "", cleaned_output)  # Remove stray backslashes
-        cleaned_output = re.sub(r"[^\x00-\x7F]+", "", cleaned_output)  # Remove non-ASCII characters
-        cleaned_output = cleaned_output.strip()  # Remove leading and trailing whitespace
-        # Step 2: Extract the JSON block
-        # This assumes JSON starts with "{" and ends with "}"
-        json_match = re.search(r"\{.*\}", cleaned_output, flags=re.DOTALL)
-        if not json_match:
-            raise ValueError("No valid JSON block found in the response.")
-        json_string = json_match.group()
-        # Step 3: Parse the JSON
-        structured_content = json.loads(json_string)
-        # Step 4: Validate required keys
-        if "title" not in structured_content or "sections" not in structured_content:
-            raise ValueError("Missing required keys: 'title' or 'sections'.")
-        # Step 5: Validate sections
-        if not isinstance(structured_content["sections"], list):
-            raise ValueError("'sections' must be a list.")
-        for section in structured_content["sections"]:
-            if "subheading" not in section or "content" not in section:
-                raise ValueError("Each section must contain 'subheading' and 'content'.")
-        return structured_content  # Return valid JSON
-    except (json.JSONDecodeError, ValueError) as e:
-        # Return an error message if validation fails
-        return {
-            "error": "Invalid JSON response",
-            "details": str(e),
-            "raw_output": raw_output[:500]  # Include snippet for debugging
-        }
-# Function to generate learning content
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
-    """
-    Generates learning content and validates the output.
-    """
     temperature = max(float(temperature), 1e-2)  # Ensure minimum temperature
     top_p = float(top_p)
@@ -92,27 +68,27 @@ def generate_learning_content(topic, description, difficulty, temperature=0.9, m
         seed=42,
     )
-    # Format the prompt
     formatted_prompt = format_prompt(topic, description, difficulty)
     # Stream the output from the model
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    raw_output = ""
     for response in stream:
-        raw_output += response.token.text
-    # Use the enhanced filtering and validation function
-    return filter_and_validate_output(raw_output)
-# Define the Gradio interface
 with gr.Blocks(theme="ocean") as demo:
     gr.HTML("<h1><center>Learning Content Generator</center></h1>")
-    # Input fields for topic, description, and difficulty
     topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
     description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
     difficulty_input = gr.Dropdown(
         label="Difficulty Level",
         choices=["High", "Medium", "Low"],
@@ -127,7 +103,7 @@ with gr.Blocks(theme="ocean") as demo:
     repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
     # Output field for generated learning content
-    output = gr.Textbox(label="Generated Learning Content", lines=15)
     # Button to generate content
     submit_button = gr.Button("Generate Learning Content")
@@ -141,4 +117,4 @@ with gr.Blocks(theme="ocean") as demo:
 # Launch the app
 if __name__ == "__main__":
-    demo.launch()

 from huggingface_hub import InferenceClient
 import gradio as gr
+# client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+# Function to format the input into a suitable prompt for generating strict and engaging learning content
+# def format_prompt(topic, description, difficulty):
+#     prompt = (
+#         f"You are an expert educator. Generate highly engaging and educational learning content "
+#         f"strictly on the topic '{topic}', with the following description: '{description}'. "
+#         f"The content should be suitable for a '{difficulty}' difficulty level, and it should be presented in a way that helps readers clearly understand the key points. "
+#         f"Please provide the content in paragraph form, ensuring it is both informative and interesting for the learner."
+#     )
+#     return prompt
 def format_prompt(topic, description, difficulty):
     prompt = (
+        f"You are an expert educator. Generate highly engaging, educational, and structured content on the topic '{topic}'. "
         f"Use the following description as context: '{description}'. "
+        f"The content should be suitable for a '{difficulty}' difficulty level and MUST strictly adhere to the following JSON structure:\n\n"
         f"{{\n"
+        f"  \"title\": \"[The title of the topic]\",\n"
         f"  \"sections\": [\n"
         f"    {{\n"
+        f"      \"subheading\": \"[A clear subheading summarizing the section]\",\n"
+        f"      \"content\": \"[Detailed explanation of the section content, written in simple and engaging language.]\"\n"
         f"    }}\n"
         f"  ]\n"
         f"}}\n\n"
         f"### Important Guidelines:\n"
+        f"1. Return only a valid JSON object with no additional text, explanations, or symbols.\n"
+        f"2. Do not include formatting like ```json or any other delimiters.\n"
+        f"3. Ensure all keys and values are properly enclosed in double quotes.\n"
+        f"4. Validate the JSON before returning it to ensure it is syntactically correct and complete.\n"
+        f"5. Do not use any extra characters like ◀ or </s>.\n"
+        f"### Example Output:\n"
+        f"{{\n"
+        f"  \"title\": \"Understanding the Basics of Thermodynamics\",\n"
+        f"  \"sections\": [\n"
+        f"    {{\n"
+        f"      \"subheading\": \"What is Thermodynamics?\",\n"
+        f"      \"content\": \"Thermodynamics is the study of energy, heat, and how they interact. It explains phenomena like ice melting or engines running.\"\n"
+        f"    }},\n"
+        f"    {{\n"
+        f"      \"subheading\": \"The Four Laws of Thermodynamics\",\n"
+        f"      \"content\": \"The four laws govern how energy moves and changes. For example, the first law states that energy cannot be created or destroyed, only transformed.\"\n"
+        f"    }}\n"
+        f"  ]\n"
+        f"}}"
     )
     return prompt
+# The function to generate learning content based on the inputs
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
     temperature = max(float(temperature), 1e-2)  # Ensure minimum temperature
     top_p = float(top_p)
         seed=42,
     )
+    # Format the prompt using the topic, description, and difficulty
     formatted_prompt = format_prompt(topic, description, difficulty)
     # Stream the output from the model
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    output = ""
     for response in stream:
+        output += response.token.text
+        yield output
+    return output
+# Define the input fields for the topic, description, and difficulty level
 with gr.Blocks(theme="ocean") as demo:
     gr.HTML("<h1><center>Learning Content Generator</center></h1>")
+    # Input fields for topic and description
     topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
     description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
+    # Dropdown for difficulty level (High, Medium, Low)
     difficulty_input = gr.Dropdown(
         label="Difficulty Level",
         choices=["High", "Medium", "Low"],
     repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
     # Output field for generated learning content
+    output = gr.Textbox(label="Generated Learning Content", lines=10)
     # Button to generate content
     submit_button = gr.Button("Generate Learning Content")
 # Launch the app
 if __name__ == "__main__":
+    demo.launch()