Spaces:

parthib07
/

SMART_AI_RESUME

Sleeping

App Files Files Community

SMART_AI_RESUME / utils /resume_parser.py

parthib07

Upload 531 files

d7d3dff verified about 2 months ago

raw

history blame contribute delete

2.74 kB

	import pypdf
	import docx
	import re
	from io import BytesIO

	class ResumeParser:
	def __init__(self):
	pass

	def extract_text_from_pdf(self, pdf_file):
	try:
	# Handle different file input types
	if hasattr(pdf_file, 'read'):
	# If it's a file-like object
	file_content = pdf_file.read()
	pdf_file.seek(0) # Reset file pointer
	else:
	# If it's already bytes
	file_content = pdf_file

	pdf_reader = pypdf.PdfReader(BytesIO(file_content))
	text = ""
	for page in pdf_reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text + "\n"
	else:
	# Handle empty page text
	text += "\n"
	return text.strip()
	except Exception as e:
	print(f"Error extracting text from PDF: {e}")
	return ""

	def extract_text_from_docx(self, docx_file):
	try:
	doc = docx.Document(BytesIO(docx_file.read()))
	text = ""
	for paragraph in doc.paragraphs:
	text += paragraph.text + "\n"
	return text.strip()
	except Exception as e:
	print(f"Error extracting text from DOCX: {e}")
	return ""

	def extract_text(self, file):
	# Reset file pointer to beginning
	file.seek(0)

	if file.name.endswith('.pdf'):
	return self.extract_text_from_pdf(file)
	elif file.name.endswith('.docx'):
	return self.extract_text_from_docx(file)
	else:
	return ""

	def parse(self, file):
	text = self.extract_text(file)

	# Simple keyword-based parsing
	skills = []
	experience = []
	education = []

	# Common programming languages and tools
	skill_keywords = ['python', 'java', 'javascript', 'html', 'css', 'sql', 'react', 'angular', 'vue',
	'node', 'express', 'django', 'flask', 'spring', 'docker', 'kubernetes', 'aws',
	'azure', 'git', 'jenkins', 'jira']

	# Look for skills
	text_lower = text.lower()
	for skill in skill_keywords:
	if skill in text_lower:
	skills.append(skill)

	return {
	"skills": skills,
	"experience": experience,
	"education": education,
	"raw_text": text
	}