DEEPAN-C
/

Resume_analysis_RAG

Document Question Answering

Model card Files Files and versions

Resume_analysis_RAG / create_pdfs.py

DEEPAN-C's picture

Upload folder using huggingface_hub

e4f4981 verified 9 months ago

History Blame Contribute Delete

1.72 kB

	from fpdf import FPDF
	import os

	def create_pdf_from_txt(txt_path, pdf_path):
	with open(txt_path, 'r') as file:
	content = file.read()

	# Replace bullets and other special characters
	content = content.replace('•', '*')
	content = content.replace('–', '-')

	pdf = FPDF()
	pdf.add_page()
	pdf.set_auto_page_break(auto=True, margin=15)
	pdf.set_font("Arial", size=12)

	# Split content into lines and encode to handle special characters
	for line in content.split('\n'):
	try:
	# Try to encode the line to latin1
	encoded_line = line.encode('latin-1', 'replace').decode('latin-1')
	pdf.cell(0, 10, txt=encoded_line, ln=True)
	except Exception:
	# If encoding fails, skip the line
	continue

	pdf.output(pdf_path)

	def main():
	# Convert job descriptions
	job_desc_dir = "JOB_DESCRIPTIONS"
	txt_files = [f for f in os.listdir(job_desc_dir) if f.endswith('.txt')]

	for txt_file in txt_files:
	txt_path = os.path.join(job_desc_dir, txt_file)
	pdf_path = os.path.join(job_desc_dir, txt_file.replace('.txt', '.pdf'))
	create_pdf_from_txt(txt_path, pdf_path)
	os.remove(txt_path)

	# Convert remaining resumes if any
	resume_dir = "DATA_resume"
	txt_files = [f for f in os.listdir(resume_dir) if f.endswith('.txt')]

	for txt_file in txt_files:
	txt_path = os.path.join(resume_dir, txt_file)
	pdf_path = os.path.join(resume_dir, txt_file.replace('.txt', '.pdf'))
	create_pdf_from_txt(txt_path, pdf_path)
	os.remove(txt_path) # Remove the txt file after creating PDF

	if __name__ == "__main__":
	main()