File size: 4,860 Bytes
a27222e
 
 
 
 
 
40030ed
 
e897e48
40030ed
a27222e
 
6f0ec91
a27222e
 
6f0ec91
a27222e
 
 
 
633fde0
 
 
a27222e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633fde0
a27222e
aaced9c
a27222e
 
 
 
 
 
 
40030ed
f45a3af
a27222e
 
ee9347e
52c61c2
a27222e
 
f45a3af
40030ed
 
 
 
 
 
 
 
 
 
 
a27222e
 
 
 
 
 
ff4ed29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a27222e
 
 
 
 
40030ed
 
ff4ed29
40030ed
a27222e
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import requests
import json
import streamlit as st
from haystack import Pipeline
from haystack.components.converters import HTMLToDocument
from haystack.components.fetchers import LinkContentFetcher
#from haystack.components.builders import PromptBuilder
#from haystack.components.generators import HuggingFaceAPIGenerator
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
from haystack.components.builders import ChatPromptBuilder
#from haystack.components.builders import DynamicChatPromptBuilder
from haystack.utils import Secret
#from dotenv import load_dotenv
import os

#load_dotenv()  # Load environment variables from a .env file

HUGGINGFACE_API_KEY = os.getenv('HUGGINGFACE_API_KEY')

# Define your quiz generation template
quiz_generation_template = """
{% message role="user" %}
Given the following text, create only 4 multiple-choice or true-false questions in JSON format randomly.
The options should be unambiguous.
For multiple-choice questions, each option should begin with a letter followed by a period and a space (e.g., "a. option").
For true-false questions, there should be only two options that is ('true','false').
The question should also briefly mention the general topic of the text so that it can be understood in isolation.
Include challenging questions that require reasoning.
Respond with JSON only, no markdown or descriptions.
Example JSON format you should absolutely follow:
{
    "questions": [
        {
            "question": "text of the question",
            "options": ["a. 1st option", "b. 2nd option", "c. 3rd option", "d. 4th option"],
            "right_option": "c"
        }
    ]
}
IMPORTANT: Do not write anything else and stop generating after once!!!
text:
{% for doc in documents %}{{ doc.content|truncate(3800) }}{% endfor %}
{% endmessage %}
"""


def generate_quiz_pipeline():
    #prompt_builder = DynamicChatPromptBuilder()
    api_type = 'serverless_inference_api'
    quiz_generation_pipeline = Pipeline()
    quiz_generation_pipeline.add_component("link_content_fetcher", LinkContentFetcher())
    quiz_generation_pipeline.add_component("html_converter", HTMLToDocument())
    quiz_generation_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=quiz_generation_template))
    '''quiz_generation_pipeline.add_component(
        "generator",
        HuggingFaceAPIGenerator(api_type=api_type,
                                api_params={'model': 'meta-llama/Llama-3.1-8B-Instruct'
                                           },
                                token=Secret.from_token(HUGGINGFACE_API_KEY),
        )
    )'''
    quiz_generation_pipeline.add_component(
    "generator",
    HuggingFaceAPIChatGenerator(
        api_type=api_type,
        api_params={
            "model": "meta-llama/Llama-3.1-8B-Instruct",
            "provider": "novita",   # important for Inference Providers
        },
        token=Secret.from_token(HUGGINGFACE_API_KEY),
    ),
)
    quiz_generation_pipeline.connect("link_content_fetcher", "html_converter")
    quiz_generation_pipeline.connect("html_converter", "prompt_builder")
    quiz_generation_pipeline.connect("prompt_builder", "generator")
    
    return quiz_generation_pipeline

import re
import json

def clean_llm_json(s: str) -> str:
    s = s.strip()

    # Strip ```json ... ``` fences if present
    if s.startswith("```"):
        # remove leading ```... line
        s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
        # remove trailing ```
        s = re.sub(r"\s*```$", "", s)

    # If it looks like a Python dict with single quotes only, convert to double quotes
    if s.startswith("{") and "'" in s and '"' not in s:
        s = s.replace("'", '"')

    # Remove trailing commas before } or ]
    s = re.sub(r",\s*}", "}", s)
    s = re.sub(r",\s*]", "]", s)

    return s


def generate_quiz(url):
    pipeline = generate_quiz_pipeline()
    try:
        results = pipeline.run({"link_content_fetcher": {"urls": [url]}})
        if "generator" in results and "replies" in results["generator"]:
            #raw_reply = results["generator"]["replies"][0]
            reply = results["generator"]["replies"][0]
            raw_reply = getattr(reply, "text", str(reply))  # ChatMessage.text is the canonical field

            # Extract the JSON part of the reply
            json_start = raw_reply.find("{")
            json_end = raw_reply.rfind("}") + 1
            if json_start == -1 or json_end == -1:
                raise ValueError("JSON not found in the reply")
            json_reply = raw_reply[json_start:json_end]
            quiz_data = json.loads(json_reply)  # Safely parse the JSON string
            return quiz_data
        else:
            raise ValueError("Unexpected response structure")
    except Exception as e:
        raise ValueError(f"Error generating quiz: {e}")