Spaces:

Abdul-Haseeb
/

Dev-Bot

Sleeping

App Files Files Community

Dev-Bot / pipelines.py

Abdul-Haseeb

Update pipelines.py

ff4ed29 verified 2 months ago

raw

history blame contribute delete

4.86 kB

	import requests
	import json
	import streamlit as st
	from haystack import Pipeline
	from haystack.components.converters import HTMLToDocument
	from haystack.components.fetchers import LinkContentFetcher
	#from haystack.components.builders import PromptBuilder
	#from haystack.components.generators import HuggingFaceAPIGenerator
	from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
	from haystack.components.builders import ChatPromptBuilder
	#from haystack.components.builders import DynamicChatPromptBuilder
	from haystack.utils import Secret
	#from dotenv import load_dotenv
	import os

	#load_dotenv() # Load environment variables from a .env file

	HUGGINGFACE_API_KEY = os.getenv('HUGGINGFACE_API_KEY')

	# Define your quiz generation template
	quiz_generation_template = """
	{% message role="user" %}
	Given the following text, create only 4 multiple-choice or true-false questions in JSON format randomly.
	The options should be unambiguous.
	For multiple-choice questions, each option should begin with a letter followed by a period and a space (e.g., "a. option").
	For true-false questions, there should be only two options that is ('true','false').
	The question should also briefly mention the general topic of the text so that it can be understood in isolation.
	Include challenging questions that require reasoning.
	Respond with JSON only, no markdown or descriptions.
	Example JSON format you should absolutely follow:
	{
	"questions": [
	{
	"question": "text of the question",
	"options": ["a. 1st option", "b. 2nd option", "c. 3rd option", "d. 4th option"],
	"right_option": "c"
	}
	]
	}
	IMPORTANT: Do not write anything else and stop generating after once!!!
	text:
	{% for doc in documents %}{{ doc.content\|truncate(3800) }}{% endfor %}
	{% endmessage %}
	"""


	def generate_quiz_pipeline():
	#prompt_builder = DynamicChatPromptBuilder()
	api_type = 'serverless_inference_api'
	quiz_generation_pipeline = Pipeline()
	quiz_generation_pipeline.add_component("link_content_fetcher", LinkContentFetcher())
	quiz_generation_pipeline.add_component("html_converter", HTMLToDocument())
	quiz_generation_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=quiz_generation_template))
	'''quiz_generation_pipeline.add_component(
	"generator",
	HuggingFaceAPIGenerator(api_type=api_type,
	api_params={'model': 'meta-llama/Llama-3.1-8B-Instruct'
	},
	token=Secret.from_token(HUGGINGFACE_API_KEY),
	)
	)'''
	quiz_generation_pipeline.add_component(
	"generator",
	HuggingFaceAPIChatGenerator(
	api_type=api_type,
	api_params={
	"model": "meta-llama/Llama-3.1-8B-Instruct",
	"provider": "novita", # important for Inference Providers
	},
	token=Secret.from_token(HUGGINGFACE_API_KEY),
	),
	)
	quiz_generation_pipeline.connect("link_content_fetcher", "html_converter")
	quiz_generation_pipeline.connect("html_converter", "prompt_builder")
	quiz_generation_pipeline.connect("prompt_builder", "generator")

	return quiz_generation_pipeline

	import re
	import json

	def clean_llm_json(s: str) -> str:
	s = s.strip()

	# Strip ```json ... ``` fences if present
	if s.startswith("```"):
	# remove leading ```... line
	s = re.sub(r"^```[a-zA-Z0-9]\s", "", s)
	# remove trailing ```
	s = re.sub(r"\s*```$", "", s)

	# If it looks like a Python dict with single quotes only, convert to double quotes
	if s.startswith("{") and "'" in s and '"' not in s:
	s = s.replace("'", '"')

	# Remove trailing commas before } or ]
	s = re.sub(r",\s*}", "}", s)
	s = re.sub(r",\s*]", "]", s)

	return s


	def generate_quiz(url):
	pipeline = generate_quiz_pipeline()
	try:
	results = pipeline.run({"link_content_fetcher": {"urls": [url]}})
	if "generator" in results and "replies" in results["generator"]:
	#raw_reply = results["generator"]["replies"][0]
	reply = results["generator"]["replies"][0]
	raw_reply = getattr(reply, "text", str(reply)) # ChatMessage.text is the canonical field

	# Extract the JSON part of the reply
	json_start = raw_reply.find("{")
	json_end = raw_reply.rfind("}") + 1
	if json_start == -1 or json_end == -1:
	raise ValueError("JSON not found in the reply")
	json_reply = raw_reply[json_start:json_end]
	quiz_data = json.loads(json_reply) # Safely parse the JSON string
	return quiz_data
	else:
	raise ValueError("Unexpected response structure")
	except Exception as e:
	raise ValueError(f"Error generating quiz: {e}")