Spaces:
Sleeping
Sleeping
| import os | |
| from crewai import Agent, Task, Crew | |
| from crewai_tools import PDFSearchTool | |
| import openai | |
| # Ensure your OpenAI API key and model settings are set | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| # Initialize the PDFSearchTool without a preset PDF | |
| pdf_tool = PDFSearchTool() | |
| # Agent 1: PDF Extractor Agent β extracts text from the specified page range. | |
| pdf_extractor = Agent( | |
| role="PDF Extractor", | |
| goal="Extract the text content from the specified page range of the provided PDF.", | |
| backstory="You specialize in reading and processing PDFs to return useful study content.", | |
| verbose=True, | |
| tools=[pdf_tool] | |
| ) | |
| # Agent 2: Flashcard Generator Agent β generates flashcards based on extracted text. | |
| flashcard_generator = Agent( | |
| role="Flashcard Generator", | |
| goal="Generate flashcards with questions and answers based on the extracted text.", | |
| backstory="You create study flashcards to test understanding, ensuring each flashcard has a clear question and answer.", | |
| verbose=True | |
| ) | |
| # Task 1: Extraction Task β Extract text from pages {page_range} of the PDF. | |
| extraction_task = Task( | |
| description=( | |
| "Extract and return the text content from the PDF located at {pdf_file_path}." | |
| ), | |
| expected_output="A string containing the extracted text from the specified pages.", | |
| agent=pdf_extractor | |
| ) | |
| # Task 2: Flashcard Generation Task β Generate {flashcard_count} flashcards from the extracted text. | |
| flashcard_task = Task( | |
| description=( | |
| "Based on the following extracted text" | |
| "Generate {flashcard_count} flashcards. Each flashcard should have a question and a corresponding answer. " | |
| "Return the flashcards as a JSON list of objects with keys 'question' and 'answer'." | |
| ), | |
| expected_output="A JSON list of flashcards.", | |
| output_file="flashcards.json", | |
| agent=flashcard_generator, | |
| context=[extraction_task] | |
| ) | |
| # In crewai_flashcard.py | |
| # # Task 1: Extraction Task β Extract text from pages {page_range} of the PDF. | |
| # extraction_task = Task( | |
| # description=( | |
| # "Extract and return the text content from pages page_range of the PDF located at pdf_file_path." | |
| # ), | |
| # expected_output="A string containing the extracted text from the specified pages.", | |
| # agent=pdf_extractor | |
| # ) | |
| # # Task 2: Flashcard Generation Task β Generate {flashcard_count} flashcards from the extracted text. | |
| # flashcard_task = Task( | |
| # description=( | |
| # "Based on the following extracted text:" | |
| # "Generate {flashcard_count} flashcards. Each flashcard should have a question and a corresponding answer. " | |
| # "Return the flashcards as a JSON list of objects with keys 'question' and 'answer'." | |
| # ), | |
| # expected_output="A JSON list of flashcards.", | |
| # agent=flashcard_generator, | |
| # context=[extraction_task] | |
| # ) | |
| # Assemble the Crew | |
| flashcard_crew = Crew( | |
| agents=[pdf_extractor, flashcard_generator], | |
| tasks=[extraction_task, flashcard_task], | |
| verbose=True | |
| ) | |
| def generate_flashcards( pdf_file_path: str,flashcard_count: int): | |
| #generate_flashcards(pdf_file_path: str, page_range: str, flashcard_count: int): | |
| """ | |
| Run the CrewAI system to extract text from specified pages and generate flashcards. | |
| Args: | |
| pdf_file_path (str): Path to the PDF file. | |
| page_range (str): Page range to extract text from (e.g., "1-5"). | |
| flashcard_count (int): Number of flashcards to generate. | |
| Returns: | |
| str: JSON string containing the flashcards. | |
| """ | |
| inputs = { | |
| "pdf_file_path": pdf_file_path, | |
| #"page_range": page_range, | |
| "flashcard_count": flashcard_count | |
| } | |
| results = flashcard_crew.kickoff(inputs=inputs) | |
| return results.raw #[flashcard_task.id] | |