Sher1988 commited on
Commit
6e51440
·
1 Parent(s): 5576849

Remove Dockerfile, .gitattributes, LICENSE

Browse files
Files changed (3) hide show
  1. app.py +57 -14
  2. core/parsing/extractor.py +1 -0
  3. core/processing/dataframe.py +48 -8
app.py CHANGED
@@ -4,21 +4,23 @@ import tempfile
4
 
5
  from core.ingestion.docling_loader import load_and_convert_cv
6
  from core.parsing.extractor import extract_resume
7
- from core.processing.dataframe import resume_to_df
8
 
9
  st.title("CV Analyzer")
10
 
11
  # ---- session state init ----
12
  if "processed" not in st.session_state:
13
  st.session_state.processed = False
14
- if "df" not in st.session_state:
15
- st.session_state.df = None
16
 
17
  uploaded_file = st.file_uploader("Upload CV (PDF)", type=["pdf"])
18
 
19
- if st.button("Upload New CV"):
 
20
  st.session_state.processed = False
21
- st.session_state.df = None
 
22
 
23
  # ---- process only once ----
24
  if uploaded_file and not st.session_state.processed:
@@ -28,22 +30,63 @@ if uploaded_file and not st.session_state.processed:
28
 
29
  text = load_and_convert_cv(pdf_path)
30
  data = extract_resume(text)
31
- df = resume_to_df(data)
32
 
33
- st.session_state.df = df
 
34
  st.session_state.processed = True
35
 
36
  # ---- display from session (no recompute) ----
37
- if st.session_state.processed and st.session_state.df is not None:
38
- df = st.session_state.df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- st.subheader("Extracted Data")
41
- st.dataframe(df)
42
 
43
- csv = df.to_csv(index=False).encode("utf-8")
 
 
 
 
 
 
 
 
 
44
  st.download_button(
45
  "Download CSV",
46
  data=csv,
47
- file_name="cv_data.csv",
48
  mime="text/csv"
49
- )
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  from core.ingestion.docling_loader import load_and_convert_cv
6
  from core.parsing.extractor import extract_resume
7
+ from core.processing.dataframe import resume_to_df, resume_to_dfs
8
 
9
  st.title("CV Analyzer")
10
 
11
  # ---- session state init ----
12
  if "processed" not in st.session_state:
13
  st.session_state.processed = False
14
+ if "dfs" not in st.session_state:
15
+ st.session_state.dfs = None
16
 
17
  uploaded_file = st.file_uploader("Upload CV (PDF)", type=["pdf"])
18
 
19
+
20
+ if not uploaded_file:
21
  st.session_state.processed = False
22
+ st.session_state.dfs = None
23
+
24
 
25
  # ---- process only once ----
26
  if uploaded_file and not st.session_state.processed:
 
30
 
31
  text = load_and_convert_cv(pdf_path)
32
  data = extract_resume(text)
33
+ dfs = resume_to_dfs(data)
34
 
35
+ st.session_state.data = data
36
+ st.session_state.dfs = dfs
37
  st.session_state.processed = True
38
 
39
  # ---- display from session (no recompute) ----
40
+ if st.session_state.processed and st.session_state.dfs is not None:
41
+ dfs = st.session_state.dfs
42
+ data = st.session_state.data # Ensure data is pulled from state
43
+
44
+
45
+ # Extract row from 'base' dataframe (assuming it's a single-row DF)
46
+ base_data = dfs['base'].iloc[0]
47
+
48
+
49
+ st.subheader("Candidate Profile")
50
+ col_spacer, col_content = st.columns([0.01, 0.99])
51
+ with col_content:
52
+ st.write(f"**Name:** {base_data.get('full_name', 'N/A')}")
53
+
54
+ # Iterate through contact fields (the ones prefixed with contact_)
55
+ contact_fields = {k.replace("contact_", "").title(): v for k, v in base_data.items() if k.startswith("contact_") and v}
56
+ for label, value in contact_fields.items():
57
+ st.write(f"**{label}:** {value}")
58
+
59
+ st.write(f"**AI/ML Skills:** {base_data.get('ai_ml_skills') or 'N/A'}")
60
+ st.write(f"**Technical Skills:** {base_data.get('technical_skills') or 'N/A'}")
61
+ st.write(f"**Certifications:** {base_data.get('certifications') or 'N/A'}")
62
+
63
+ if base_data.get("summary"):
64
+ st.info(f"**Summary:** {base_data['summary']}")
65
 
 
 
66
 
67
+ # Display other tables (Experience, Education, etc.)
68
+ for label, df in dfs.items():
69
+ if label == "base":
70
+ continue
71
+ st.subheader(label.replace("_", " ").title())
72
+ st.dataframe(df, use_container_width=True)
73
+
74
+ # Download Button
75
+ df_full = resume_to_df(data)
76
+ csv = df_full.to_csv(index=False).encode("utf-8")
77
  st.download_button(
78
  "Download CSV",
79
  data=csv,
80
+ file_name=f"analyzed_{uploaded_file.name}.csv",
81
  mime="text/csv"
82
+ )
83
+
84
+
85
+
86
+
87
+ # "full_name": r.get("full_name"),
88
+ # "summary": r.get("summary"),
89
+ # **{f"contact_{k}": v for k, v in (r.get("contact") or {}).items()},
90
+ # "ai_ml_skills"
91
+ # "technical_skills"
92
+ # "certifications"
core/parsing/extractor.py CHANGED
@@ -25,6 +25,7 @@ agent = Agent(
25
  model=model,
26
  system_prompt=(
27
  'You are an expert resume extractor.'
 
28
  'Do NOT infer or hallucinate missing sections.'
29
  'If a section is not explicitly present, return null or empty list.'
30
  ),
 
25
  model=model,
26
  system_prompt=(
27
  'You are an expert resume extractor.'
28
+ 'If the context is not a Resume return null and DO NOT infer or hallucinate.'
29
  'Do NOT infer or hallucinate missing sections.'
30
  'If a section is not explicitly present, return null or empty list.'
31
  ),
core/processing/dataframe.py CHANGED
@@ -1,12 +1,14 @@
1
  import pandas as pd
 
2
 
3
- def resume_to_df(resume):
4
- r = resume.dict()
 
5
 
6
  base = {
7
  "full_name": r["full_name"],
8
  "summary": r["summary"],
9
- **{f"contact_{k}": v for k, v in r["contact"].items()},
10
  "ai_ml_skills": ", ".join(r.get("ai_ml_skills", []) or []),
11
  "technical_skills": ", ".join(r.get("technical_skills", []) or []),
12
  "certifications": ", ".join(r.get("certifications", []) or [])
@@ -19,18 +21,17 @@ def resume_to_df(resume):
19
  len(r.get("education") or []),
20
  len(r.get("experience") or []),
21
  len(r.get("projects") or []),
22
- 1
23
  )
24
- print('max_len: ', max_len)
25
 
26
  for i in range(max_len):
27
- row = base.copy()
28
 
29
  # education
30
  educations = r.get("education", []) or []
31
  if i < len(educations):
32
  e = educations[i]
33
- row.update({
34
  "edu_institution": e["institution"],
35
  "edu_degree": e["degree"],
36
  "edu_start": e["start_date"],
@@ -61,4 +62,43 @@ def resume_to_df(resume):
61
 
62
  rows.append(row)
63
 
64
- return pd.DataFrame(rows)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
+ from core.parsing.schema import Resume
3
 
4
+ def resume_to_df(resume: Resume):
5
+ # r = resume.dict()
6
+ r = resume.model_dump() # Dictionary -> key, value pairs
7
 
8
  base = {
9
  "full_name": r["full_name"],
10
  "summary": r["summary"],
11
+ **{f"contact_{k}": v for k, v in r["contact"].items() if v != None},
12
  "ai_ml_skills": ", ".join(r.get("ai_ml_skills", []) or []),
13
  "technical_skills": ", ".join(r.get("technical_skills", []) or []),
14
  "certifications": ", ".join(r.get("certifications", []) or [])
 
21
  len(r.get("education") or []),
22
  len(r.get("experience") or []),
23
  len(r.get("projects") or []),
24
+ 1 # atleast one row.
25
  )
 
26
 
27
  for i in range(max_len):
28
+ row = {} #base.copy()
29
 
30
  # education
31
  educations = r.get("education", []) or []
32
  if i < len(educations):
33
  e = educations[i]
34
+ row.update({ # row |= {}
35
  "edu_institution": e["institution"],
36
  "edu_degree": e["degree"],
37
  "edu_start": e["start_date"],
 
62
 
63
  rows.append(row)
64
 
65
+ return pd.DataFrame(rows)
66
+
67
+
68
+
69
+
70
+ def resume_to_dfs(resume: Resume):
71
+ r = resume.model_dump()
72
+
73
+ # 1. Base Info (Contact, Skills, Summary)
74
+ # Flattens the top-level fields and the nested 'contact' dict
75
+ base_data = {
76
+ "full_name": r.get("full_name"),
77
+ "summary": r.get("summary"),
78
+ **{f"contact_{k}": v for k, v in (r.get("contact") or {}).items()},
79
+ "ai_ml_skills": ", ".join(r.get("ai_ml_skills") or []),
80
+ "technical_skills": ", ".join(r.get("technical_skills") or []),
81
+ "certifications": ", ".join(r.get("certifications") or [])
82
+ }
83
+ df_base = pd.DataFrame([base_data])
84
+
85
+ # 2. Education DataFrame
86
+ df_edu = pd.DataFrame(r.get("education") or [])
87
+
88
+ # 3. Experience DataFrame
89
+ df_exp = pd.DataFrame(r.get("experience") or [])
90
+
91
+ # 4. Projects DataFrame
92
+ # We handle the 'technologies' list by joining it into a string for the CSV/Table view
93
+ projects = r.get("projects") or []
94
+ for p in projects:
95
+ if isinstance(p.get("technologies"), list):
96
+ p["technologies"] = ", ".join(p["technologies"])
97
+ df_proj = pd.DataFrame(projects)
98
+
99
+ return {
100
+ "base": df_base,
101
+ "education": df_edu,
102
+ "experience": df_exp,
103
+ "projects": df_proj
104
+ }