flash_cards / crewai_flashcard.py
JustusI's picture
Update crewai_flashcard.py
e4bc372 verified
import os
from crewai import Agent, Task, Crew
from crewai_tools import PDFSearchTool
import openai
# Ensure your OpenAI API key and model settings are set
openai.api_key = os.getenv("OPENAI_API_KEY")
# Initialize the PDFSearchTool without a preset PDF
pdf_tool = PDFSearchTool()
# Agent 1: PDF Extractor Agent – extracts text from the specified page range.
pdf_extractor = Agent(
role="PDF Extractor",
goal="Extract the text content from the specified page range of the provided PDF.",
backstory="You specialize in reading and processing PDFs to return useful study content.",
verbose=True,
tools=[pdf_tool]
)
# Agent 2: Flashcard Generator Agent – generates flashcards based on extracted text.
flashcard_generator = Agent(
role="Flashcard Generator",
goal="Generate flashcards with questions and answers based on the extracted text.",
backstory="You create study flashcards to test understanding, ensuring each flashcard has a clear question and answer.",
verbose=True
)
# Task 1: Extraction Task – Extract text from pages {page_range} of the PDF.
extraction_task = Task(
description=(
"Extract and return the text content from the PDF located at {pdf_file_path}."
),
expected_output="A string containing the extracted text from the specified pages.",
agent=pdf_extractor
)
# Task 2: Flashcard Generation Task – Generate {flashcard_count} flashcards from the extracted text.
flashcard_task = Task(
description=(
"Based on the following extracted text"
"Generate {flashcard_count} flashcards. Each flashcard should have a question and a corresponding answer. "
"Return the flashcards as a JSON list of objects with keys 'question' and 'answer'."
),
expected_output="A JSON list of flashcards.",
output_file="flashcards.json",
agent=flashcard_generator,
context=[extraction_task]
)
# In crewai_flashcard.py
# # Task 1: Extraction Task – Extract text from pages {page_range} of the PDF.
# extraction_task = Task(
# description=(
# "Extract and return the text content from pages page_range of the PDF located at pdf_file_path."
# ),
# expected_output="A string containing the extracted text from the specified pages.",
# agent=pdf_extractor
# )
# # Task 2: Flashcard Generation Task – Generate {flashcard_count} flashcards from the extracted text.
# flashcard_task = Task(
# description=(
# "Based on the following extracted text:"
# "Generate {flashcard_count} flashcards. Each flashcard should have a question and a corresponding answer. "
# "Return the flashcards as a JSON list of objects with keys 'question' and 'answer'."
# ),
# expected_output="A JSON list of flashcards.",
# agent=flashcard_generator,
# context=[extraction_task]
# )
# Assemble the Crew
flashcard_crew = Crew(
agents=[pdf_extractor, flashcard_generator],
tasks=[extraction_task, flashcard_task],
verbose=True
)
def generate_flashcards( pdf_file_path: str,flashcard_count: int):
#generate_flashcards(pdf_file_path: str, page_range: str, flashcard_count: int):
"""
Run the CrewAI system to extract text from specified pages and generate flashcards.
Args:
pdf_file_path (str): Path to the PDF file.
page_range (str): Page range to extract text from (e.g., "1-5").
flashcard_count (int): Number of flashcards to generate.
Returns:
str: JSON string containing the flashcards.
"""
inputs = {
"pdf_file_path": pdf_file_path,
#"page_range": page_range,
"flashcard_count": flashcard_count
}
results = flashcard_crew.kickoff(inputs=inputs)
return results.raw #[flashcard_task.id]