Spaces:
Sleeping
Sleeping
| import requests | |
| import json | |
| import streamlit as st | |
| from haystack import Pipeline | |
| from haystack.components.converters import HTMLToDocument | |
| from haystack.components.fetchers import LinkContentFetcher | |
| #from haystack.components.builders import PromptBuilder | |
| #from haystack.components.generators import HuggingFaceAPIGenerator | |
| from haystack.components.generators.chat import HuggingFaceAPIChatGenerator | |
| from haystack.components.builders import ChatPromptBuilder | |
| #from haystack.components.builders import DynamicChatPromptBuilder | |
| from haystack.utils import Secret | |
| #from dotenv import load_dotenv | |
| import os | |
| #load_dotenv() # Load environment variables from a .env file | |
| HUGGINGFACE_API_KEY = os.getenv('HUGGINGFACE_API_KEY') | |
| # Define your quiz generation template | |
| quiz_generation_template = """ | |
| {% message role="user" %} | |
| Given the following text, create only 4 multiple-choice or true-false questions in JSON format randomly. | |
| The options should be unambiguous. | |
| For multiple-choice questions, each option should begin with a letter followed by a period and a space (e.g., "a. option"). | |
| For true-false questions, there should be only two options that is ('true','false'). | |
| The question should also briefly mention the general topic of the text so that it can be understood in isolation. | |
| Include challenging questions that require reasoning. | |
| Respond with JSON only, no markdown or descriptions. | |
| Example JSON format you should absolutely follow: | |
| { | |
| "questions": [ | |
| { | |
| "question": "text of the question", | |
| "options": ["a. 1st option", "b. 2nd option", "c. 3rd option", "d. 4th option"], | |
| "right_option": "c" | |
| } | |
| ] | |
| } | |
| IMPORTANT: Do not write anything else and stop generating after once!!! | |
| text: | |
| {% for doc in documents %}{{ doc.content|truncate(3800) }}{% endfor %} | |
| {% endmessage %} | |
| """ | |
| def generate_quiz_pipeline(): | |
| #prompt_builder = DynamicChatPromptBuilder() | |
| api_type = 'serverless_inference_api' | |
| quiz_generation_pipeline = Pipeline() | |
| quiz_generation_pipeline.add_component("link_content_fetcher", LinkContentFetcher()) | |
| quiz_generation_pipeline.add_component("html_converter", HTMLToDocument()) | |
| quiz_generation_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=quiz_generation_template)) | |
| '''quiz_generation_pipeline.add_component( | |
| "generator", | |
| HuggingFaceAPIGenerator(api_type=api_type, | |
| api_params={'model': 'meta-llama/Llama-3.1-8B-Instruct' | |
| }, | |
| token=Secret.from_token(HUGGINGFACE_API_KEY), | |
| ) | |
| )''' | |
| quiz_generation_pipeline.add_component( | |
| "generator", | |
| HuggingFaceAPIChatGenerator( | |
| api_type=api_type, | |
| api_params={ | |
| "model": "meta-llama/Llama-3.1-8B-Instruct", | |
| "provider": "novita", # important for Inference Providers | |
| }, | |
| token=Secret.from_token(HUGGINGFACE_API_KEY), | |
| ), | |
| ) | |
| quiz_generation_pipeline.connect("link_content_fetcher", "html_converter") | |
| quiz_generation_pipeline.connect("html_converter", "prompt_builder") | |
| quiz_generation_pipeline.connect("prompt_builder", "generator") | |
| return quiz_generation_pipeline | |
| import re | |
| import json | |
| def clean_llm_json(s: str) -> str: | |
| s = s.strip() | |
| # Strip ```json ... ``` fences if present | |
| if s.startswith("```"): | |
| # remove leading ```... line | |
| s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s) | |
| # remove trailing ``` | |
| s = re.sub(r"\s*```$", "", s) | |
| # If it looks like a Python dict with single quotes only, convert to double quotes | |
| if s.startswith("{") and "'" in s and '"' not in s: | |
| s = s.replace("'", '"') | |
| # Remove trailing commas before } or ] | |
| s = re.sub(r",\s*}", "}", s) | |
| s = re.sub(r",\s*]", "]", s) | |
| return s | |
| def generate_quiz(url): | |
| pipeline = generate_quiz_pipeline() | |
| try: | |
| results = pipeline.run({"link_content_fetcher": {"urls": [url]}}) | |
| if "generator" in results and "replies" in results["generator"]: | |
| #raw_reply = results["generator"]["replies"][0] | |
| reply = results["generator"]["replies"][0] | |
| raw_reply = getattr(reply, "text", str(reply)) # ChatMessage.text is the canonical field | |
| # Extract the JSON part of the reply | |
| json_start = raw_reply.find("{") | |
| json_end = raw_reply.rfind("}") + 1 | |
| if json_start == -1 or json_end == -1: | |
| raise ValueError("JSON not found in the reply") | |
| json_reply = raw_reply[json_start:json_end] | |
| quiz_data = json.loads(json_reply) # Safely parse the JSON string | |
| return quiz_data | |
| else: | |
| raise ValueError("Unexpected response structure") | |
| except Exception as e: | |
| raise ValueError(f"Error generating quiz: {e}") | |