Dev-Bot / pipelines.py
Abdul-Haseeb's picture
Update pipelines.py
ff4ed29 verified
import requests
import json
import streamlit as st
from haystack import Pipeline
from haystack.components.converters import HTMLToDocument
from haystack.components.fetchers import LinkContentFetcher
#from haystack.components.builders import PromptBuilder
#from haystack.components.generators import HuggingFaceAPIGenerator
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
from haystack.components.builders import ChatPromptBuilder
#from haystack.components.builders import DynamicChatPromptBuilder
from haystack.utils import Secret
#from dotenv import load_dotenv
import os
#load_dotenv() # Load environment variables from a .env file
HUGGINGFACE_API_KEY = os.getenv('HUGGINGFACE_API_KEY')
# Define your quiz generation template
quiz_generation_template = """
{% message role="user" %}
Given the following text, create only 4 multiple-choice or true-false questions in JSON format randomly.
The options should be unambiguous.
For multiple-choice questions, each option should begin with a letter followed by a period and a space (e.g., "a. option").
For true-false questions, there should be only two options that is ('true','false').
The question should also briefly mention the general topic of the text so that it can be understood in isolation.
Include challenging questions that require reasoning.
Respond with JSON only, no markdown or descriptions.
Example JSON format you should absolutely follow:
{
"questions": [
{
"question": "text of the question",
"options": ["a. 1st option", "b. 2nd option", "c. 3rd option", "d. 4th option"],
"right_option": "c"
}
]
}
IMPORTANT: Do not write anything else and stop generating after once!!!
text:
{% for doc in documents %}{{ doc.content|truncate(3800) }}{% endfor %}
{% endmessage %}
"""
def generate_quiz_pipeline():
#prompt_builder = DynamicChatPromptBuilder()
api_type = 'serverless_inference_api'
quiz_generation_pipeline = Pipeline()
quiz_generation_pipeline.add_component("link_content_fetcher", LinkContentFetcher())
quiz_generation_pipeline.add_component("html_converter", HTMLToDocument())
quiz_generation_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=quiz_generation_template))
'''quiz_generation_pipeline.add_component(
"generator",
HuggingFaceAPIGenerator(api_type=api_type,
api_params={'model': 'meta-llama/Llama-3.1-8B-Instruct'
},
token=Secret.from_token(HUGGINGFACE_API_KEY),
)
)'''
quiz_generation_pipeline.add_component(
"generator",
HuggingFaceAPIChatGenerator(
api_type=api_type,
api_params={
"model": "meta-llama/Llama-3.1-8B-Instruct",
"provider": "novita", # important for Inference Providers
},
token=Secret.from_token(HUGGINGFACE_API_KEY),
),
)
quiz_generation_pipeline.connect("link_content_fetcher", "html_converter")
quiz_generation_pipeline.connect("html_converter", "prompt_builder")
quiz_generation_pipeline.connect("prompt_builder", "generator")
return quiz_generation_pipeline
import re
import json
def clean_llm_json(s: str) -> str:
s = s.strip()
# Strip ```json ... ``` fences if present
if s.startswith("```"):
# remove leading ```... line
s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
# remove trailing ```
s = re.sub(r"\s*```$", "", s)
# If it looks like a Python dict with single quotes only, convert to double quotes
if s.startswith("{") and "'" in s and '"' not in s:
s = s.replace("'", '"')
# Remove trailing commas before } or ]
s = re.sub(r",\s*}", "}", s)
s = re.sub(r",\s*]", "]", s)
return s
def generate_quiz(url):
pipeline = generate_quiz_pipeline()
try:
results = pipeline.run({"link_content_fetcher": {"urls": [url]}})
if "generator" in results and "replies" in results["generator"]:
#raw_reply = results["generator"]["replies"][0]
reply = results["generator"]["replies"][0]
raw_reply = getattr(reply, "text", str(reply)) # ChatMessage.text is the canonical field
# Extract the JSON part of the reply
json_start = raw_reply.find("{")
json_end = raw_reply.rfind("}") + 1
if json_start == -1 or json_end == -1:
raise ValueError("JSON not found in the reply")
json_reply = raw_reply[json_start:json_end]
quiz_data = json.loads(json_reply) # Safely parse the JSON string
return quiz_data
else:
raise ValueError("Unexpected response structure")
except Exception as e:
raise ValueError(f"Error generating quiz: {e}")