CV-Extractor

Running

App Files Files Community

CV-Extractor / core /processing /dataframe.py

Sher1988

Remove Dockerfile, .gitattributes, LICENSE

6e51440 14 days ago

raw

history blame contribute delete

3.22 kB

	import pandas as pd
	from core.parsing.schema import Resume

	def resume_to_df(resume: Resume):
	# r = resume.dict()
	r = resume.model_dump() # Dictionary -> key, value pairs

	base = {
	"full_name": r["full_name"],
	"summary": r["summary"],
	**{f"contact_{k}": v for k, v in r["contact"].items() if v != None},
	"ai_ml_skills": ", ".join(r.get("ai_ml_skills", []) or []),
	"technical_skills": ", ".join(r.get("technical_skills", []) or []),
	"certifications": ", ".join(r.get("certifications", []) or [])
	}

	rows = []

	# max length among lists
	max_len = max(
	len(r.get("education") or []),
	len(r.get("experience") or []),
	len(r.get("projects") or []),
	1 # atleast one row.
	)

	for i in range(max_len):
	row = {} #base.copy()

	# education
	educations = r.get("education", []) or []
	if i < len(educations):
	e = educations[i]
	row.update({ # row \|= {}
	"edu_institution": e["institution"],
	"edu_degree": e["degree"],
	"edu_start": e["start_date"],
	"edu_end": e["end_date"],
	})

	# experience
	experiences = r.get("experience", []) or []
	if i < len(experiences):
	ex = experiences[i]
	row.update({
	"exp_title": ex["title"],
	"exp_company": ex["company"],
	"exp_start": ex["start_date"],
	"exp_end": ex["end_date"],
	})

	# projects
	projects = r.get("projects", []) or []
	if i < len(projects):
	p = projects[i]
	row.update({
	"proj_name": p["name"],
	"proj_desc": p["description"],
	"proj_tech": ", ".join(p["technologies"]),
	"proj_score": p["difficulty_score"],
	})

	rows.append(row)

	return pd.DataFrame(rows)




	def resume_to_dfs(resume: Resume):
	r = resume.model_dump()

	# 1. Base Info (Contact, Skills, Summary)
	# Flattens the top-level fields and the nested 'contact' dict
	base_data = {
	"full_name": r.get("full_name"),
	"summary": r.get("summary"),
	**{f"contact_{k}": v for k, v in (r.get("contact") or {}).items()},
	"ai_ml_skills": ", ".join(r.get("ai_ml_skills") or []),
	"technical_skills": ", ".join(r.get("technical_skills") or []),
	"certifications": ", ".join(r.get("certifications") or [])
	}
	df_base = pd.DataFrame([base_data])

	# 2. Education DataFrame
	df_edu = pd.DataFrame(r.get("education") or [])

	# 3. Experience DataFrame
	df_exp = pd.DataFrame(r.get("experience") or [])

	# 4. Projects DataFrame
	# We handle the 'technologies' list by joining it into a string for the CSV/Table view
	projects = r.get("projects") or []
	for p in projects:
	if isinstance(p.get("technologies"), list):
	p["technologies"] = ", ".join(p["technologies"])
	df_proj = pd.DataFrame(projects)

	return {
	"base": df_base,
	"education": df_edu,
	"experience": df_exp,
	"projects": df_proj
	}