Spaces:

feliponi
/

hirly-ner

Sleeping

App Files Files Community

feliponi commited on Nov 12

Commit

30a1b76

verified ·

1 Parent(s): d9d3195

Upload app.py

Browse files

Files changed (1) hide show

app.py +130 -101

app.py CHANGED Viewed

@@ -1,160 +1,189 @@
 """
-app.py
-Interface Gradio para o Modelo de Extração de Entidades (SKILL, EXPERIENCE_DURATION)
-Carrega o modelo treinado e fornece uma UI para comparar CV e JD.
 """
 import gradio as gr
 import re
-from typing import List, Dict, Set
-# Importa o extrator que já criamos
-# HF Spaces irá executar isso da raiz, então o caminho 'scripts' está correto.
 from scripts.inference import EntityExtractor
-# --- 1. Carregamento do Modelo ---
-# Carrega o modelo na memória apenas uma vez, quando o app inicia.
-# Certifique-se de que este caminho está correto para onde o HF Spaces irá encontrá-lo.
-MODEL_PATH = "feliponi/hirly-ner-multi"
 try:
     extractor = EntityExtractor(MODEL_PATH)
-    print(f"Modelo carregado com sucesso de {MODEL_PATH}")
 except Exception as e:
-    print(f"ERRO CRÍTICO: Não foi possível carregar o modelo de {MODEL_PATH}.")
-    print("Certifique-se de que o modelo treinado está no diretório correto.")
-    # Se o modelo não carregar, o Gradio falhará, o que é esperado.
     extractor = None
-# --- 2. Lógica de Negócio (Nova) ---
 def parse_and_sum_experience(entities: List[Dict]) -> float:
     """
-    Analisa os spans de 'EXPERIENCE_DURATION' e os soma em anos.
-    Esta é uma lógica de negócio e pode ser complexa.
-    Exemplos de conversão:
-    - "5+ years" -> 5.0
-    - "6 months"  -> 0.5
-    - "3-5 anos"  -> 3.0 (pegamos o primeiro número)
-    - "two years" -> 2.0
     """
     total_experience = 0.0
-    # Mapeamento simples de palavras para números
     num_words = {
-        'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
-        'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10
     }
-    # Filtra apenas as entidades de experiência
-    durations = [e['entity'].lower() for e in entities if e['label'] == 'EXPERIENCE_DURATION']
     for text in durations:
         found_number = None
-        # 1. Tenta encontrar números (dígitos, ex: "5", "5.5", "3-5")
-        # Pega o primeiro número que encontrar
-        match = re.search(r'(\d+[\.,]\d+|\d+)', text)
         if match:
-            found_number = float(match.group(1).replace(',', '.'))
         else:
-            # 2. Tenta encontrar números por extenso
             for word, number in num_words.items():
                 if word in text:
                     found_number = number
                     break
         if found_number is not None:
-            # 3. Verifica a unidade (meses ou anos)
-            if 'month' in text or 'mes' in text:
                 total_experience += found_number / 12
             else:
-                # Assume "anos" (years) como padrão
                 total_experience += found_number
     return round(total_experience, 1)
-def analyze_cv_and_jd(cv_text: str, jd_text: str) -> (str, str, str, List[str], List[str]):
     """
-    Função principal que o Gradio irá chamar.
-    Processa o CV e o JD, encontra skills, soma experiências e compara.
     """
-    if not extractor:
-        return "ERRO: Modelo não carregado.", "", "", [], []
-    # 1. Processa ambos os textos
-    cv_entities = extractor.extract_entities_with_details(cv_text, confidence_threshold=0.7)
-    jd_entities = extractor.extract_entities_with_details(jd_text, confidence_threshold=0.7)
-    # 2. Soma a experiência (lógica de negócio)
-    cv_exp = parse_and_sum_experience(cv_entities)
-    jd_exp = parse_and_sum_experience(jd_entities)
-    # 3. Compara as skills
-    cv_skills = {e['entity'].lower() for e in cv_entities if e['label'] == 'SKILL'}
-    jd_skills = {e['entity'].lower() for e in jd_entities if e['label'] == 'SKILL'}
-    matching_skills = cv_skills.intersection(jd_skills)
-    cv_only_skills = cv_skills - jd_skills
-    jd_only_skills = jd_skills - cv_skills
-    # 4. Formata a saída
-    match_output = f"""
-    ## 🚀 Match Analysis
-    **Compatible Skills between Resume and Job Description: {len(matching_skills)}**
-    ---
-    {', '.join(sorted(list(matching_skills))) if matching_skills else 'There is no skills compatiable found.'}
     """
     cv_exp_str = f"{cv_exp} years"
-    jd_exp_str = f"{jd_exp} years (The extraction could list nay requirement, like: '5+ years')"
-    return (
-        match_output,
-        cv_exp_str,
-        jd_exp_str,
-        sorted(list(cv_only_skills)),
-        sorted(list(jd_only_skills))
-    )
-# --- 3. Definição da Interface Gradio ---
-with gr.Blocks(title="Hirly - Resume & Job Description Analysis") as demo:
-    gr.Markdown("# 🚀 Resume & Job Description Analysis")
     gr.Markdown(
-        "Enter a resume and a job description for analysis "
-        "skills, experience to see they're compatible"
     )
     with gr.Row():
         with gr.Column():
-            cv_input = gr.Textbox(lines=20, label="Resume Text")
         with gr.Column():
-            jd_input = gr.Textbox(lines=20, label="Job Description Text (JD)")
-    analyze_button = gr.Button("Execute Analysis", variant="primary")
     gr.Markdown("---")
     with gr.Row():
         with gr.Column(scale=2):
-            match_output = gr.Markdown(label="Match Results")
         with gr.Column(scale=1):
-            cv_exp_output = gr.Textbox(label="Total Experience from Resume", interactive=False)
-            jd_exp_output = gr.Textbox(label="Total Experience required from JD", interactive=False)
     with gr.Row():
-        cv_only_output = gr.JSON(label="Resume Skills")
-        jd_only_output = gr.JSON(label="JD Skills")
-    # Conecta o botão à função
     analyze_button.click(
         fn=analyze_cv_and_jd,
         inputs=[cv_input, jd_input],
-        outputs=[match_output, cv_exp_output, jd_exp_output, cv_only_output, jd_only_output]
     )
 if __name__ == "__main__":

 """
+app.py (MULTI-LABEL V2 - English UI)
+Gradio interface for the Entity Extraction Model
+(SKILL, SOFT_SKILL, LANG, CERT, EXPERIENCE_DURATION)
+Loads the trained model and provides a UI to compare CV and JD.
 """
 import gradio as gr
 import re
+from typing import List, Dict, Set, Tuple
+# Import the extractor we already created
 from scripts.inference import EntityExtractor
+# --- 1. Model Loading ---
+# --- MODIFICATION ---
+# Point to the local model you just trained
+MODEL_PATH = "models/hirly_ner_multi"
 try:
     extractor = EntityExtractor(MODEL_PATH)
+    print(f"Model loaded successfully from {MODEL_PATH}")
 except Exception as e:
+    print(f"CRITICAL ERROR: Could not load model from {MODEL_PATH}.")
+    print("Ensure the trained model is in the correct directory.")
     extractor = None
+# --- 2. Business Logic (Unchanged) ---
 def parse_and_sum_experience(entities: List[Dict]) -> float:
     """
+    Parses 'EXPERIENCE_DURATION' spans and sums them into years.
+    (This function remains the same)
     """
     total_experience = 0.0
     num_words = {
+        "one": 1,
+        "two": 2,
+        "three": 3,
+        "four": 4,
+        "five": 5,
+        "six": 6,
+        "seven": 7,
+        "eight": 8,
+        "nine": 9,
+        "ten": 10,
     }
+    durations = [
+        e["entity"].lower() for e in entities if e["label"] == "EXPERIENCE_DURATION"
+    ]
     for text in durations:
         found_number = None
+        match = re.search(r"(\d+[\.,]\d+|\d+)", text)
         if match:
+            found_number = float(match.group(1).replace(",", "."))
         else:
             for word, number in num_words.items():
                 if word in text:
                     found_number = number
                     break
         if found_number is not None:
+            if "month" in text or "mes" in text:
                 total_experience += found_number / 12
             else:
                 total_experience += found_number
     return round(total_experience, 1)
+def extract_and_group_entities(
+    text: str, confidence_threshold: float
+) -> Dict[str, Set[str]]:
     """
+    Extracts entities from text and groups them by label.
     """
+    grouped_entities = {
+        "SKILL": set(),
+        "SOFT_SKILL": set(),
+        "LANG": set(),
+        "CERT": set(),
+        "EXPERIENCE_DURATION": set(),
+    }
+    entities = extractor.extract_entities_with_details(text, confidence_threshold)
+    for entity in entities:
+        label = entity.get("label")
+        if label in grouped_entities:
+            grouped_entities[label].add(entity["entity"].lower())
+    return grouped_entities
+def analyze_cv_and_jd(cv_text: str, jd_text: str) -> (str, str, str, Dict, Dict):
+    """
+    Main function called by Gradio.
+    Processes CV and JD, finds all entities, sums experience, and compares.
     """
+    if not extractor:
+        return "ERROR: Model not loaded.", "", "", {}, {}
+    # 1. Process texts and group entities
+    cv_groups = extract_and_group_entities(cv_text, confidence_threshold=0.7)
+    jd_groups = extract_and_group_entities(jd_text, confidence_threshold=0.7)
+    # 2. Sum experience
+    cv_exp_entities = extractor.extract_entities_with_details(cv_text, 0.7)
+    jd_exp_entities = extractor.extract_entities_with_details(jd_text, 0.7)
+    cv_exp = parse_and_sum_experience(cv_exp_entities)
+    jd_exp = parse_and_sum_experience(jd_exp_entities)
+    # 3. Format Match Analysis output
+    match_output = "## 🚀 Match Analysis\n\n"
+    labels_to_match = ["SKILL", "SOFT_SKILL", "LANG", "CERT"]
+    for label in labels_to_match:
+        cv_set = cv_groups[label]
+        jd_set = jd_groups[label]
+        matching = cv_set.intersection(jd_set)
+        match_output += f"**Matching {label.replace('_', ' ')}S: {len(matching)}**\n"
+        if matching:
+            match_output += f"_{', '.join(sorted(list(matching)))}_\n"
+        else:
+            match_output += "_No matching items found._\n"
+        match_output += "---\n"
+    # 4. Format JSON outputs
+    cv_groups.pop("EXPERIENCE_DURATION")
+    jd_groups.pop("EXPERIENCE_DURATION")
+    cv_json_output = {k: sorted(list(v)) for k, v in cv_groups.items() if v}
+    jd_json_output = {k: sorted(list(v)) for k, v in jd_groups.items() if v}
     cv_exp_str = f"{cv_exp} years"
+    jd_exp_str = f"{jd_exp} years (Requirement extracted from JD)"
+    return (match_output, cv_exp_str, jd_exp_str, cv_json_output, jd_json_output)
+# --- 3. Gradio Interface Definition (All English) ---
+with gr.Blocks(title="Hirly - Resume & JD Analyzer") as demo:
+    gr.Markdown("# 🚀 Resume vs. Job Description Analyzer")
     gr.Markdown(
+        "Provide the text from a Resume (CV) and a Job Description (JD) to extract "
+        "skills, soft skills, languages, certifications, years of experience, and see their compatibility."
     )
     with gr.Row():
         with gr.Column():
+            cv_input = gr.Textbox(lines=20, label="Resume (CV) Text")
         with gr.Column():
+            jd_input = gr.Textbox(lines=20, label="Job Description (JD) Text")
+    analyze_button = gr.Button("Analyze Compatibility", variant="primary")
     gr.Markdown("---")
     with gr.Row():
         with gr.Column(scale=2):
+            match_output = gr.Markdown(label="Match Analysis")
         with gr.Column(scale=1):
+            cv_exp_output = gr.Textbox(label="Total Experience (CV)", interactive=False)
+            jd_exp_output = gr.Textbox(label="Total Experience (JD)", interactive=False)
     with gr.Row():
+        cv_only_output = gr.JSON(label="Entities Found in CV")
+        jd_only_output = gr.JSON(label="Entities Required by JD")
+    # Connect button to function
     analyze_button.click(
         fn=analyze_cv_and_jd,
         inputs=[cv_input, jd_input],
+        outputs=[
+            match_output,
+            cv_exp_output,
+            jd_exp_output,
+            cv_only_output,
+            jd_only_output,
+        ],
     )
 if __name__ == "__main__":