Spaces:

abhiimanyu
/

LearningContent

Sleeping

App Files Files Community

abhiimanyu commited on Nov 18, 2024

Commit

d4befbe

verified ·

1 Parent(s): 00bcad5

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -39

app.py CHANGED Viewed

@@ -1,17 +1,10 @@
 from huggingface_hub import InferenceClient
 import gradio as gr
-# client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
-# Function to format the input into a suitable prompt for generating strict and engaging learning content
-# def format_prompt(topic, description, difficulty):
-#     prompt = (
-#         f"You are an expert educator. Generate highly engaging and educational learning content "
-#         f"strictly on the topic '{topic}', with the following description: '{description}'. "
-#         f"The content should be suitable for a '{difficulty}' difficulty level, and it should be presented in a way that helps readers clearly understand the key points. "
-#         f"Please provide the content in paragraph form, ensuring it is both informative and interesting for the learner."
-#     )
-#     return prompt
 def format_prompt(topic, description, difficulty):
     prompt = (
         f"You are an expert educator. Generate highly engaging, educational, and structured content on the topic '{topic}'. "
@@ -32,30 +25,62 @@ def format_prompt(topic, description, difficulty):
         f"3. Ensure all keys and values are properly enclosed in double quotes.\n"
         f"4. Validate the JSON before returning it to ensure it is syntactically correct and complete.\n"
         f"5. Do not use any extra characters like ◀ or </s>.\n"
-        f"### Example Output:\n"
-        f"{{\n"
-        f"  \"title\": \"Understanding the Basics of Thermodynamics\",\n"
-        f"  \"sections\": [\n"
-        f"    {{\n"
-        f"      \"subheading\": \"What is Thermodynamics?\",\n"
-        f"      \"content\": \"Thermodynamics is the study of energy, heat, and how they interact. It explains phenomena like ice melting or engines running.\"\n"
-        f"    }},\n"
-        f"    {{\n"
-        f"      \"subheading\": \"The Four Laws of Thermodynamics\",\n"
-        f"      \"content\": \"The four laws govern how energy moves and changes. For example, the first law states that energy cannot be created or destroyed, only transformed.\"\n"
-        f"    }}\n"
-        f"  ]\n"
-        f"}}"
     )
     return prompt
-# The function to generate learning content based on the inputs
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
     temperature = max(float(temperature), 1e-2)  # Ensure minimum temperature
     top_p = float(top_p)
@@ -68,27 +93,26 @@ def generate_learning_content(topic, description, difficulty, temperature=0.9, m
         seed=42,
     )
-    # Format the prompt using the topic, description, and difficulty
     formatted_prompt = format_prompt(topic, description, difficulty)
     # Stream the output from the model
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    output = ""
     for response in stream:
-        output += response.token.text
-        yield output
-    return output
-# Define the input fields for the topic, description, and difficulty level
 with gr.Blocks(theme="ocean") as demo:
     gr.HTML("<h1><center>Learning Content Generator</center></h1>")
-    # Input fields for topic and description
     topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
     description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
-    # Dropdown for difficulty level (High, Medium, Low)
     difficulty_input = gr.Dropdown(
         label="Difficulty Level",
         choices=["High", "Medium", "Low"],
@@ -103,7 +127,7 @@ with gr.Blocks(theme="ocean") as demo:
     repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
     # Output field for generated learning content
-    output = gr.Textbox(label="Generated Learning Content", lines=10)
     # Button to generate content
     submit_button = gr.Button("Generate Learning Content")

 from huggingface_hub import InferenceClient
 import gradio as gr
+# Initialize HuggingFace client
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+# Function to format the input into a strict JSON-based prompt
 def format_prompt(topic, description, difficulty):
     prompt = (
         f"You are an expert educator. Generate highly engaging, educational, and structured content on the topic '{topic}'. "
         f"3. Ensure all keys and values are properly enclosed in double quotes.\n"
         f"4. Validate the JSON before returning it to ensure it is syntactically correct and complete.\n"
         f"5. Do not use any extra characters like ◀ or </s>.\n"
     )
     return prompt
+# Function to clean and format the AI output
+def clean_and_format_output(output):
+    """
+    Cleans and validates the output to ensure it is valid JSON.
+    """
+    # Remove non-ASCII characters
+    cleaned_output = re.sub(r'[^\x00-\x7F]+', '', output)
+    # Remove extraneous symbols like ◀ and backticks
+    cleaned_output = re.sub(r'`|<s>|</s>|◀', '', cleaned_output)
+    # Remove text before the first '{' and after the last '}'
+    cleaned_output = re.sub(r'^[^{]*', '', cleaned_output)
+    cleaned_output = re.sub(r'[^}]*$', '', cleaned_output)
+    # Replace improperly escaped characters (e.g., \_)
+    cleaned_output = re.sub(r'\\_', '_', cleaned_output)
+    # Normalize whitespace
+    cleaned_output = re.sub(r'\s+', ' ', cleaned_output).strip()
+    # Remove improperly escaped quotes
+    cleaned_output = cleaned_output.replace('\\"', '"')
+    # Remove trailing commas
+    cleaned_output = re.sub(r',\s*(\}|\])', r'\1', cleaned_output)
+    try:
+        # Attempt to parse the cleaned string as JSON
+        json_output = json.loads(cleaned_output)
+        # Check for required structure
+        if "title" not in json_output or "sections" not in json_output:
+            raise ValueError("Missing required keys: 'title' or 'sections'.")
+        if not isinstance(json_output["sections"], list):
+            raise ValueError("'sections' must be a list.")
+        for section in json_output["sections"]:
+            if "subheading" not in section or "content" not in section:
+                raise ValueError("Each section must contain 'subheading' and 'content'.")
+        return json_output
+    except (json.JSONDecodeError, ValueError) as e:
+        return {
+            "error": "Failed to parse or validate output as JSON",
+            "details": str(e),
+            "output": cleaned_output
+        }
+# Function to generate learning content
 def generate_learning_content(topic, description, difficulty, temperature=0.9, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2):
+    """
+    Generates learning content and validates the output.
+    """
     temperature = max(float(temperature), 1e-2)  # Ensure minimum temperature
     top_p = float(top_p)
         seed=42,
     )
+    # Format the prompt
     formatted_prompt = format_prompt(topic, description, difficulty)
     # Stream the output from the model
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    raw_output = ""
     for response in stream:
+        raw_output += response.token.text
+    # Clean and validate the raw output
+    return clean_and_format_output(raw_output)
+# Define the Gradio interface
 with gr.Blocks(theme="ocean") as demo:
     gr.HTML("<h1><center>Learning Content Generator</center></h1>")
+    # Input fields for topic, description, and difficulty
     topic_input = gr.Textbox(label="Topic", placeholder="Enter the topic for learning content.")
     description_input = gr.Textbox(label="Description", placeholder="Enter a brief description of the topic.")
     difficulty_input = gr.Dropdown(
         label="Difficulty Level",
         choices=["High", "Medium", "Low"],
     repetition_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.2, label="Repetition penalty")
     # Output field for generated learning content
+    output = gr.Textbox(label="Generated Learning Content", lines=15)
     # Button to generate content
     submit_button = gr.Button("Generate Learning Content")