Spaces:
Sleeping
Sleeping
| """ | |
| Studio Generator - Uses LLM to generate flashcards and quiz questions | |
| """ | |
| from typing import List, Optional | |
| import json | |
| import re | |
| from models.studio_models import ( | |
| Flashcard, FlashcardCreate, FlashcardGenerateRequest, | |
| Quiz, QuizQuestion, QuizGenerateRequest, | |
| DifficultyLevel, QuestionType | |
| ) | |
| from utils.llm_generator import LLMGenerator | |
| from utils.studio_manager import StudioManager | |
| class StudioGenerator: | |
| """Generate flashcards and quizzes using LLM""" | |
| def __init__(self, llm_generator: LLMGenerator, studio_manager: StudioManager): | |
| self.llm = llm_generator | |
| self.studio = studio_manager | |
| async def generate_flashcards(self, request: FlashcardGenerateRequest) -> List[Flashcard]: | |
| """Generate flashcards from content using LLM""" | |
| # Gather source content | |
| content = await self._gather_content( | |
| request.space_id, | |
| request.source_type, | |
| request.source_ids, | |
| request.text_content | |
| ) | |
| if not content: | |
| return [] | |
| # Create prompt for LLM | |
| prompt = self._create_flashcard_prompt(content, request.num_cards, request.difficulty) | |
| # Generate flashcards using LLM | |
| response = await self.llm.generate(prompt, max_tokens=2000) | |
| if not response: | |
| return [] | |
| # Parse flashcards from response | |
| flashcards = self._parse_flashcards( | |
| response, | |
| request.space_id, | |
| request.source_type, | |
| request.source_ids, | |
| request.difficulty | |
| ) | |
| # Save flashcards to storage | |
| saved_cards = [] | |
| for card_data in flashcards: | |
| card = self.studio.create_flashcard(card_data) | |
| saved_cards.append(card) | |
| return saved_cards | |
| async def generate_quiz(self, request: QuizGenerateRequest) -> Optional[Quiz]: | |
| """Generate a quiz from content using LLM""" | |
| # Gather source content | |
| content = await self._gather_content( | |
| request.space_id, | |
| request.source_type, | |
| request.source_ids, | |
| request.text_content | |
| ) | |
| if not content: | |
| return None | |
| # Create prompt for LLM | |
| prompt = self._create_quiz_prompt( | |
| content, | |
| request.num_questions, | |
| request.question_types, | |
| request.difficulty | |
| ) | |
| # Generate quiz using LLM | |
| response = await self.llm.generate(prompt, max_tokens=3000) | |
| if not response: | |
| return None | |
| # Parse quiz questions from response | |
| questions = self._parse_quiz_questions(response, request.question_types, request.difficulty) | |
| if not questions: | |
| return None | |
| # Create quiz | |
| from models.studio_models import QuizCreate | |
| quiz_data = QuizCreate( | |
| space_id=request.space_id, | |
| title=request.title, | |
| description=f"Generated quiz with {len(questions)} questions", | |
| questions=questions, | |
| source_type=request.source_type, | |
| source_ids=request.source_ids | |
| ) | |
| quiz = self.studio.create_quiz(quiz_data) | |
| return quiz | |
| async def _gather_content( | |
| self, | |
| space_id: str, | |
| source_type: str, | |
| source_ids: Optional[List[str]], | |
| text_content: Optional[str] | |
| ) -> str: | |
| """Gather content from various sources""" | |
| if text_content: | |
| return text_content | |
| content_parts = [] | |
| if source_type == "notebook" and source_ids: | |
| # Get notebook entries | |
| for entry_id in source_ids: | |
| entry = self.studio.get_notebook_entry(entry_id) | |
| if entry: | |
| content_parts.append(f"# {entry.title}\n\n{entry.content}") | |
| elif source_type == "file" and source_ids: | |
| # TODO: Integrate with file retriever to get file content | |
| # For now, just return a placeholder | |
| content_parts.append("File content retrieval not yet implemented") | |
| return "\n\n---\n\n".join(content_parts) | |
| def _create_flashcard_prompt(self, content: str, num_cards: int, difficulty: DifficultyLevel) -> str: | |
| """Create prompt for flashcard generation""" | |
| difficulty_desc = { | |
| DifficultyLevel.EASY: "basic concepts and definitions", | |
| DifficultyLevel.MEDIUM: "key concepts and applications", | |
| DifficultyLevel.HARD: "advanced concepts and critical thinking" | |
| } | |
| prompt = f"""Based on the following content, create {num_cards} flashcards focusing on {difficulty_desc[difficulty]}. | |
| Content: | |
| {content[:3000]} # Limit content length | |
| Format your response as a JSON array of flashcards, where each flashcard has: | |
| - "question": The question or prompt (front of card) | |
| - "answer": The answer or explanation (back of card) | |
| Example format: | |
| [ | |
| {{"question": "What is...", "answer": "It is..."}}, | |
| {{"question": "How does...", "answer": "It works by..."}} | |
| ] | |
| Generate exactly {num_cards} flashcards:""" | |
| return prompt | |
| def _create_quiz_prompt( | |
| self, | |
| content: str, | |
| num_questions: int, | |
| question_types: List[QuestionType], | |
| difficulty: DifficultyLevel | |
| ) -> str: | |
| """Create prompt for quiz generation""" | |
| types_str = ", ".join(qt.value for qt in question_types) | |
| prompt = f"""Based on the following content, create a quiz with {num_questions} questions. | |
| Content: | |
| {content[:3000]} # Limit content length | |
| Question types to include: {types_str} | |
| Difficulty level: {difficulty.value} | |
| Format your response as a JSON array of questions, where each question has: | |
| - "question": The question text | |
| - "type": One of: {types_str} | |
| - "options": Array of 4 options (for multiple_choice only) | |
| - "correct_answer": The correct answer | |
| - "explanation": Brief explanation of why this is correct | |
| Example format: | |
| [ | |
| {{ | |
| "question": "What is...", | |
| "type": "multiple_choice", | |
| "options": ["Option A", "Option B", "Option C", "Option D"], | |
| "correct_answer": "Option A", | |
| "explanation": "This is correct because..." | |
| }}, | |
| {{ | |
| "question": "True or False: ...", | |
| "type": "true_false", | |
| "options": ["True", "False"], | |
| "correct_answer": "True", | |
| "explanation": "This is true because..." | |
| }} | |
| ] | |
| Generate exactly {num_questions} questions:""" | |
| return prompt | |
| def _parse_flashcards( | |
| self, | |
| response: str, | |
| space_id: str, | |
| source_type: str, | |
| source_ids: Optional[List[str]], | |
| difficulty: DifficultyLevel | |
| ) -> List[FlashcardCreate]: | |
| """Parse flashcards from LLM response""" | |
| flashcards = [] | |
| try: | |
| # Try to extract JSON from response | |
| json_match = re.search(r'\[[\s\S]*\]', response) | |
| if json_match: | |
| cards_data = json.loads(json_match.group(0)) | |
| for card_data in cards_data: | |
| if 'question' in card_data and 'answer' in card_data: | |
| flashcards.append(FlashcardCreate( | |
| space_id=space_id, | |
| question=card_data['question'], | |
| answer=card_data['answer'], | |
| difficulty=difficulty, | |
| source_type=source_type, | |
| source_id=source_ids[0] if source_ids else None | |
| )) | |
| except Exception as e: | |
| print(f"Error parsing flashcards: {e}") | |
| # Fallback: Try to parse as simple Q&A pairs | |
| lines = response.split('\n') | |
| current_question = None | |
| for line in lines: | |
| line = line.strip() | |
| if line.startswith('Q:') or line.startswith('Question:'): | |
| current_question = line.split(':', 1)[1].strip() | |
| elif line.startswith('A:') or line.startswith('Answer:'): | |
| if current_question: | |
| answer = line.split(':', 1)[1].strip() | |
| flashcards.append(FlashcardCreate( | |
| space_id=space_id, | |
| question=current_question, | |
| answer=answer, | |
| difficulty=difficulty, | |
| source_type=source_type, | |
| source_id=source_ids[0] if source_ids else None | |
| )) | |
| current_question = None | |
| return flashcards | |
| def _parse_quiz_questions( | |
| self, | |
| response: str, | |
| question_types: List[QuestionType], | |
| difficulty: DifficultyLevel | |
| ) -> List[QuizQuestion]: | |
| """Parse quiz questions from LLM response""" | |
| questions = [] | |
| try: | |
| # Try to extract JSON from response | |
| json_match = re.search(r'\[[\s\S]*\]', response) | |
| if json_match: | |
| questions_data = json.loads(json_match.group(0)) | |
| for idx, q_data in enumerate(questions_data): | |
| import uuid | |
| # Parse question type | |
| q_type = QuestionType.MULTIPLE_CHOICE | |
| if 'type' in q_data: | |
| try: | |
| q_type = QuestionType(q_data['type']) | |
| except ValueError: | |
| q_type = QuestionType.MULTIPLE_CHOICE | |
| questions.append(QuizQuestion( | |
| id=str(uuid.uuid4()), | |
| question=q_data.get('question', ''), | |
| type=q_type, | |
| options=q_data.get('options'), | |
| correct_answer=q_data.get('correct_answer', ''), | |
| explanation=q_data.get('explanation'), | |
| points=1, | |
| difficulty=difficulty | |
| )) | |
| except Exception as e: | |
| print(f"Error parsing quiz questions: {e}") | |
| return questions | |