Spaces:

shravanijadhav264
/

JobShield-AI

Sleeping

JobShield-AI / resume_processor.py

Initial clean commit

984c70c 16 days ago

1.34 kB

	import re
	import os
	from PyPDF2 import PdfReader
	from docx import Document
	from utils.text_processing import clean_text

	def extract_text_from_pdf(pdf_path):
	text = ""
	try:
	with open(pdf_path, 'rb') as file:
	reader = PdfReader(file)
	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text + "\n"
	except Exception as e:
	print(f"Error processing PDF: {e}")
	return text

	def extract_text_from_docx(docx_path):
	text = ""
	try:
	doc = Document(docx_path)
	for para in doc.paragraphs:
	if para.text.strip():
	text += para.text + "\n"
	except Exception as e:
	print(f"Error processing DOCX: {e}")
	return text

	def process_resume(file_path):
	if file_path.endswith('.pdf'):
	text = extract_text_from_pdf(file_path)
	elif file_path.endswith('.docx'):
	text = extract_text_from_docx(file_path)
	else:
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	text = file.read()
	except:
	with open(file_path, 'r', encoding='latin-1') as file:
	text = file.read()
	return clean_text(text)

	def process_jd(file_path):
	return process_resume(file_path)