Spaces:

Chirag20
/

cvmatcher

Running

cvmatcher / src /tools /cv_parser.py

Initial deployment: job application intelligence agent

ec55f11 2 days ago

1.25 kB

	import fitz
	from docx import Document
	from crewai.tools import BaseTool
	from pydantic import BaseModel, Field
	from typing import Type


	class CVParserInput(BaseModel):
	file_path: str = Field(description="Absolute path to the CV file (.pdf or .docx)")


	class CVParserTool(BaseTool):
	name: str = "CV Parser"
	description: str = "Reads and extracts full text from a CV in PDF or DOCX format."
	args_schema: Type[BaseModel] = CVParserInput

	def _run(self, file_path: str) -> str:
	try:
	if file_path.endswith(".pdf"):
	return self._parse_pdf(file_path)
	elif file_path.endswith(".docx"):
	return self._parse_docx(file_path)
	else:
	return "Unsupported file format. Only PDF and DOCX supported."
	except Exception as e:
	return f"Error reading file: {str(e)}"

	def _parse_pdf(self, path: str) -> str:
	with fitz.open(path) as doc:
	text = ""
	for page in doc:
	text += page.get_text()
	return text.strip()

	def _parse_docx(self, path: str) -> str:
	doc = Document(path)
	text = "\n".join([para.text for para in doc.paragraphs])
	return text.strip()