Spaces:

mohhhhhit
/

notebook-backend

Sleeping

App Files Files Community

notebook-backend / utils /studio_generator.py

mohhhhhit

first init

3736c33 verified about 2 months ago

raw

history blame contribute delete

10.8 kB

	"""
	Studio Generator - Uses LLM to generate flashcards and quiz questions
	"""
	from typing import List, Optional
	import json
	import re

	from models.studio_models import (
	Flashcard, FlashcardCreate, FlashcardGenerateRequest,
	Quiz, QuizQuestion, QuizGenerateRequest,
	DifficultyLevel, QuestionType
	)
	from utils.llm_generator import LLMGenerator
	from utils.studio_manager import StudioManager


	class StudioGenerator:
	"""Generate flashcards and quizzes using LLM"""

	def __init__(self, llm_generator: LLMGenerator, studio_manager: StudioManager):
	self.llm = llm_generator
	self.studio = studio_manager

	async def generate_flashcards(self, request: FlashcardGenerateRequest) -> List[Flashcard]:
	"""Generate flashcards from content using LLM"""

	# Gather source content
	content = await self._gather_content(
	request.space_id,
	request.source_type,
	request.source_ids,
	request.text_content
	)

	if not content:
	return []

	# Create prompt for LLM
	prompt = self._create_flashcard_prompt(content, request.num_cards, request.difficulty)

	# Generate flashcards using LLM
	response = await self.llm.generate(prompt, max_tokens=2000)

	if not response:
	return []

	# Parse flashcards from response
	flashcards = self._parse_flashcards(
	response,
	request.space_id,
	request.source_type,
	request.source_ids,
	request.difficulty
	)

	# Save flashcards to storage
	saved_cards = []
	for card_data in flashcards:
	card = self.studio.create_flashcard(card_data)
	saved_cards.append(card)

	return saved_cards

	async def generate_quiz(self, request: QuizGenerateRequest) -> Optional[Quiz]:
	"""Generate a quiz from content using LLM"""

	# Gather source content
	content = await self._gather_content(
	request.space_id,
	request.source_type,
	request.source_ids,
	request.text_content
	)

	if not content:
	return None

	# Create prompt for LLM
	prompt = self._create_quiz_prompt(
	content,
	request.num_questions,
	request.question_types,
	request.difficulty
	)

	# Generate quiz using LLM
	response = await self.llm.generate(prompt, max_tokens=3000)

	if not response:
	return None

	# Parse quiz questions from response
	questions = self._parse_quiz_questions(response, request.question_types, request.difficulty)

	if not questions:
	return None

	# Create quiz
	from models.studio_models import QuizCreate
	quiz_data = QuizCreate(
	space_id=request.space_id,
	title=request.title,
	description=f"Generated quiz with {len(questions)} questions",
	questions=questions,
	source_type=request.source_type,
	source_ids=request.source_ids
	)

	quiz = self.studio.create_quiz(quiz_data)
	return quiz

	async def _gather_content(
	self,
	space_id: str,
	source_type: str,
	source_ids: Optional[List[str]],
	text_content: Optional[str]
	) -> str:
	"""Gather content from various sources"""

	if text_content:
	return text_content

	content_parts = []

	if source_type == "notebook" and source_ids:
	# Get notebook entries
	for entry_id in source_ids:
	entry = self.studio.get_notebook_entry(entry_id)
	if entry:
	content_parts.append(f"# {entry.title}\n\n{entry.content}")

	elif source_type == "file" and source_ids:
	# TODO: Integrate with file retriever to get file content
	# For now, just return a placeholder
	content_parts.append("File content retrieval not yet implemented")

	return "\n\n---\n\n".join(content_parts)

	def _create_flashcard_prompt(self, content: str, num_cards: int, difficulty: DifficultyLevel) -> str:
	"""Create prompt for flashcard generation"""

	difficulty_desc = {
	DifficultyLevel.EASY: "basic concepts and definitions",
	DifficultyLevel.MEDIUM: "key concepts and applications",
	DifficultyLevel.HARD: "advanced concepts and critical thinking"
	}

	prompt = f"""Based on the following content, create {num_cards} flashcards focusing on {difficulty_desc[difficulty]}.

	Content:
	{content[:3000]} # Limit content length

	Format your response as a JSON array of flashcards, where each flashcard has:
	- "question": The question or prompt (front of card)
	- "answer": The answer or explanation (back of card)

	Example format:
	[
	{{"question": "What is...", "answer": "It is..."}},
	{{"question": "How does...", "answer": "It works by..."}}
	]

	Generate exactly {num_cards} flashcards:"""

	return prompt

	def _create_quiz_prompt(
	self,
	content: str,
	num_questions: int,
	question_types: List[QuestionType],
	difficulty: DifficultyLevel
	) -> str:
	"""Create prompt for quiz generation"""

	types_str = ", ".join(qt.value for qt in question_types)

	prompt = f"""Based on the following content, create a quiz with {num_questions} questions.

	Content:
	{content[:3000]} # Limit content length

	Question types to include: {types_str}
	Difficulty level: {difficulty.value}

	Format your response as a JSON array of questions, where each question has:
	- "question": The question text
	- "type": One of: {types_str}
	- "options": Array of 4 options (for multiple_choice only)
	- "correct_answer": The correct answer
	- "explanation": Brief explanation of why this is correct

	Example format:
	[
	{{
	"question": "What is...",
	"type": "multiple_choice",
	"options": ["Option A", "Option B", "Option C", "Option D"],
	"correct_answer": "Option A",
	"explanation": "This is correct because..."
	}},
	{{
	"question": "True or False: ...",
	"type": "true_false",
	"options": ["True", "False"],
	"correct_answer": "True",
	"explanation": "This is true because..."
	}}
	]

	Generate exactly {num_questions} questions:"""

	return prompt

	def _parse_flashcards(
	self,
	response: str,
	space_id: str,
	source_type: str,
	source_ids: Optional[List[str]],
	difficulty: DifficultyLevel
	) -> List[FlashcardCreate]:
	"""Parse flashcards from LLM response"""

	flashcards = []

	try:
	# Try to extract JSON from response
	json_match = re.search(r'\[[\s\S]*\]', response)
	if json_match:
	cards_data = json.loads(json_match.group(0))

	for card_data in cards_data:
	if 'question' in card_data and 'answer' in card_data:
	flashcards.append(FlashcardCreate(
	space_id=space_id,
	question=card_data['question'],
	answer=card_data['answer'],
	difficulty=difficulty,
	source_type=source_type,
	source_id=source_ids[0] if source_ids else None
	))
	except Exception as e:
	print(f"Error parsing flashcards: {e}")
	# Fallback: Try to parse as simple Q&A pairs
	lines = response.split('\n')
	current_question = None

	for line in lines:
	line = line.strip()
	if line.startswith('Q:') or line.startswith('Question:'):
	current_question = line.split(':', 1)[1].strip()
	elif line.startswith('A:') or line.startswith('Answer:'):
	if current_question:
	answer = line.split(':', 1)[1].strip()
	flashcards.append(FlashcardCreate(
	space_id=space_id,
	question=current_question,
	answer=answer,
	difficulty=difficulty,
	source_type=source_type,
	source_id=source_ids[0] if source_ids else None
	))
	current_question = None

	return flashcards

	def _parse_quiz_questions(
	self,
	response: str,
	question_types: List[QuestionType],
	difficulty: DifficultyLevel
	) -> List[QuizQuestion]:
	"""Parse quiz questions from LLM response"""

	questions = []

	try:
	# Try to extract JSON from response
	json_match = re.search(r'\[[\s\S]*\]', response)
	if json_match:
	questions_data = json.loads(json_match.group(0))

	for idx, q_data in enumerate(questions_data):
	import uuid

	# Parse question type
	q_type = QuestionType.MULTIPLE_CHOICE
	if 'type' in q_data:
	try:
	q_type = QuestionType(q_data['type'])
	except ValueError:
	q_type = QuestionType.MULTIPLE_CHOICE

	questions.append(QuizQuestion(
	id=str(uuid.uuid4()),
	question=q_data.get('question', ''),
	type=q_type,
	options=q_data.get('options'),
	correct_answer=q_data.get('correct_answer', ''),
	explanation=q_data.get('explanation'),
	points=1,
	difficulty=difficulty
	))
	except Exception as e:
	print(f"Error parsing quiz questions: {e}")

	return questions