cvmatcher / src /tools /cv_parser.py
Chirag20's picture
Initial deployment: job application intelligence agent
ec55f11
import fitz
from docx import Document
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from typing import Type
class CVParserInput(BaseModel):
file_path: str = Field(description="Absolute path to the CV file (.pdf or .docx)")
class CVParserTool(BaseTool):
name: str = "CV Parser"
description: str = "Reads and extracts full text from a CV in PDF or DOCX format."
args_schema: Type[BaseModel] = CVParserInput
def _run(self, file_path: str) -> str:
try:
if file_path.endswith(".pdf"):
return self._parse_pdf(file_path)
elif file_path.endswith(".docx"):
return self._parse_docx(file_path)
else:
return "Unsupported file format. Only PDF and DOCX supported."
except Exception as e:
return f"Error reading file: {str(e)}"
def _parse_pdf(self, path: str) -> str:
with fitz.open(path) as doc:
text = ""
for page in doc:
text += page.get_text()
return text.strip()
def _parse_docx(self, path: str) -> str:
doc = Document(path)
text = "\n".join([para.text for para in doc.paragraphs])
return text.strip()