import fitz from docx import Document from crewai.tools import BaseTool from pydantic import BaseModel, Field from typing import Type class CVParserInput(BaseModel): file_path: str = Field(description="Absolute path to the CV file (.pdf or .docx)") class CVParserTool(BaseTool): name: str = "CV Parser" description: str = "Reads and extracts full text from a CV in PDF or DOCX format." args_schema: Type[BaseModel] = CVParserInput def _run(self, file_path: str) -> str: try: if file_path.endswith(".pdf"): return self._parse_pdf(file_path) elif file_path.endswith(".docx"): return self._parse_docx(file_path) else: return "Unsupported file format. Only PDF and DOCX supported." except Exception as e: return f"Error reading file: {str(e)}" def _parse_pdf(self, path: str) -> str: with fitz.open(path) as doc: text = "" for page in doc: text += page.get_text() return text.strip() def _parse_docx(self, path: str) -> str: doc = Document(path) text = "\n".join([para.text for para in doc.paragraphs]) return text.strip()