| import fitz | |
| from docx import Document | |
| from crewai.tools import BaseTool | |
| from pydantic import BaseModel, Field | |
| from typing import Type | |
| class CVParserInput(BaseModel): | |
| file_path: str = Field(description="Absolute path to the CV file (.pdf or .docx)") | |
| class CVParserTool(BaseTool): | |
| name: str = "CV Parser" | |
| description: str = "Reads and extracts full text from a CV in PDF or DOCX format." | |
| args_schema: Type[BaseModel] = CVParserInput | |
| def _run(self, file_path: str) -> str: | |
| try: | |
| if file_path.endswith(".pdf"): | |
| return self._parse_pdf(file_path) | |
| elif file_path.endswith(".docx"): | |
| return self._parse_docx(file_path) | |
| else: | |
| return "Unsupported file format. Only PDF and DOCX supported." | |
| except Exception as e: | |
| return f"Error reading file: {str(e)}" | |
| def _parse_pdf(self, path: str) -> str: | |
| with fitz.open(path) as doc: | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text.strip() | |
| def _parse_docx(self, path: str) -> str: | |
| doc = Document(path) | |
| text = "\n".join([para.text for para in doc.paragraphs]) | |
| return text.strip() |