File size: 2,947 Bytes
153f2a7
 
 
 
 
 
6e51440
153f2a7
09eb4cc
153f2a7
 
 
 
6e51440
 
153f2a7
 
 
6e51440
 
153f2a7
6e51440
 
153f2a7
 
 
 
 
 
 
 
 
6e51440
153f2a7
6e51440
 
153f2a7
 
 
6e51440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153f2a7
 
6e51440
 
 
 
 
 
 
 
 
 
153f2a7
 
 
6e51440
153f2a7
6e51440
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
import pandas as pd
import tempfile

from core.ingestion.docling_loader import load_and_convert_cv
from core.parsing.extractor import extract_resume
from core.processing.dataframe import resume_to_df, resume_to_dfs

st.title("📊 Resume Details Extractor")

# ---- session state init ----
if "processed" not in st.session_state:
    st.session_state.processed = False
if "dfs" not in st.session_state:
    st.session_state.dfs = None

uploaded_file = st.file_uploader("Upload CV (PDF)", type=["pdf"])


if not uploaded_file:
    st.session_state.processed = False
    st.session_state.dfs = None


# ---- process only once ----
if uploaded_file and not st.session_state.processed:
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
        tmp.write(uploaded_file.read())
        pdf_path = tmp.name

    text = load_and_convert_cv(pdf_path)
    data = extract_resume(text)
    dfs = resume_to_dfs(data)

    st.session_state.data = data
    st.session_state.dfs = dfs
    st.session_state.processed = True

# ---- display from session (no recompute) ----
if st.session_state.processed and st.session_state.dfs is not None:
    dfs = st.session_state.dfs
    data = st.session_state.data # Ensure data is pulled from state

    
    # Extract row from 'base' dataframe (assuming it's a single-row DF)
    base_data = dfs['base'].iloc[0]
    

    st.subheader("Candidate Profile")
    col_spacer, col_content = st.columns([0.01, 0.99])
    with col_content:
        st.write(f"**Name:** {base_data.get('full_name', 'N/A')}")
        
        # Iterate through contact fields (the ones prefixed with contact_)
        contact_fields = {k.replace("contact_", "").title(): v for k, v in base_data.items() if k.startswith("contact_") and v}
        for label, value in contact_fields.items():
            st.write(f"**{label}:** {value}")

        st.write(f"**AI/ML Skills:** {base_data.get('ai_ml_skills') or 'N/A'}")
        st.write(f"**Technical Skills:** {base_data.get('technical_skills') or 'N/A'}")
        st.write(f"**Certifications:** {base_data.get('certifications') or 'N/A'}")
        
        if base_data.get("summary"):
            st.info(f"**Summary:** {base_data['summary']}")


    # Display other tables (Experience, Education, etc.)
    for label, df in dfs.items():
        if label == "base":
            continue 
        st.subheader(label.replace("_", " ").title())
        st.dataframe(df, use_container_width=True)

    # Download Button
    df_full = resume_to_df(data)
    csv = df_full.to_csv(index=False).encode("utf-8")
    st.download_button(
        "Download CSV",
        data=csv,
        file_name=f"analyzed_{uploaded_file.name}.csv",
        mime="text/csv"
    )




# "full_name": r.get("full_name"),
# "summary": r.get("summary"),
# **{f"contact_{k}": v for k, v in (r.get("contact") or {}).items()},
# "ai_ml_skills"
# "technical_skills"
# "certifications"