feliponi commited on
Commit
30a1b76
·
verified ·
1 Parent(s): d9d3195

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -101
app.py CHANGED
@@ -1,160 +1,189 @@
1
  """
2
- app.py
3
 
4
- Interface Gradio para o Modelo de Extração de Entidades (SKILL, EXPERIENCE_DURATION)
5
- Carrega o modelo treinado e fornece uma UI para comparar CV e JD.
 
6
  """
7
 
8
  import gradio as gr
9
  import re
10
- from typing import List, Dict, Set
11
 
12
- # Importa o extrator que criamos
13
- # HF Spaces irá executar isso da raiz, então o caminho 'scripts' está correto.
14
  from scripts.inference import EntityExtractor
15
 
16
- # --- 1. Carregamento do Modelo ---
17
- # Carrega o modelo na memória apenas uma vez, quando o app inicia.
18
- # Certifique-se de que este caminho está correto para onde o HF Spaces irá encontrá-lo.
19
- MODEL_PATH = "feliponi/hirly-ner-multi"
20
  try:
21
  extractor = EntityExtractor(MODEL_PATH)
22
- print(f"Modelo carregado com sucesso de {MODEL_PATH}")
23
  except Exception as e:
24
- print(f"ERRO CRÍTICO: Não foi possível carregar o modelo de {MODEL_PATH}.")
25
- print("Certifique-se de que o modelo treinado está no diretório correto.")
26
- # Se o modelo não carregar, o Gradio falhará, o que é esperado.
27
  extractor = None
28
 
29
- # --- 2. Lógica de Negócio (Nova) ---
 
30
 
31
  def parse_and_sum_experience(entities: List[Dict]) -> float:
32
  """
33
- Analisa os spans de 'EXPERIENCE_DURATION' e os soma em anos.
34
- Esta é uma lógica de negócio e pode ser complexa.
35
-
36
- Exemplos de conversão:
37
- - "5+ years" -> 5.0
38
- - "6 months" -> 0.5
39
- - "3-5 anos" -> 3.0 (pegamos o primeiro número)
40
- - "two years" -> 2.0
41
  """
42
  total_experience = 0.0
43
-
44
- # Mapeamento simples de palavras para números
45
  num_words = {
46
- 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
47
- 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10
 
 
 
 
 
 
 
 
48
  }
49
-
50
- # Filtra apenas as entidades de experiência
51
- durations = [e['entity'].lower() for e in entities if e['label'] == 'EXPERIENCE_DURATION']
52
-
53
  for text in durations:
54
  found_number = None
55
-
56
- # 1. Tenta encontrar números (dígitos, ex: "5", "5.5", "3-5")
57
- # Pega o primeiro número que encontrar
58
- match = re.search(r'(\d+[\.,]\d+|\d+)', text)
59
  if match:
60
- found_number = float(match.group(1).replace(',', '.'))
61
  else:
62
- # 2. Tenta encontrar números por extenso
63
  for word, number in num_words.items():
64
  if word in text:
65
  found_number = number
66
  break
67
-
68
  if found_number is not None:
69
- # 3. Verifica a unidade (meses ou anos)
70
- if 'month' in text or 'mes' in text:
71
  total_experience += found_number / 12
72
  else:
73
- # Assume "anos" (years) como padrão
74
  total_experience += found_number
75
-
76
  return round(total_experience, 1)
77
 
78
 
79
- def analyze_cv_and_jd(cv_text: str, jd_text: str) -> (str, str, str, List[str], List[str]):
 
 
80
  """
81
- Função principal que o Gradio irá chamar.
82
- Processa o CV e o JD, encontra skills, soma experiências e compara.
83
  """
84
- if not extractor:
85
- return "ERRO: Modelo não carregado.", "", "", [], []
86
-
87
- # 1. Processa ambos os textos
88
- cv_entities = extractor.extract_entities_with_details(cv_text, confidence_threshold=0.7)
89
- jd_entities = extractor.extract_entities_with_details(jd_text, confidence_threshold=0.7)
90
-
91
- # 2. Soma a experiência (lógica de negócio)
92
- cv_exp = parse_and_sum_experience(cv_entities)
93
- jd_exp = parse_and_sum_experience(jd_entities)
94
-
95
- # 3. Compara as skills
96
- cv_skills = {e['entity'].lower() for e in cv_entities if e['label'] == 'SKILL'}
97
- jd_skills = {e['entity'].lower() for e in jd_entities if e['label'] == 'SKILL'}
98
-
99
- matching_skills = cv_skills.intersection(jd_skills)
100
- cv_only_skills = cv_skills - jd_skills
101
- jd_only_skills = jd_skills - cv_skills
102
-
103
- # 4. Formata a saída
104
- match_output = f"""
105
- ## 🚀 Match Analysis
106
-
107
- **Compatible Skills between Resume and Job Description: {len(matching_skills)}**
108
- ---
109
- {', '.join(sorted(list(matching_skills))) if matching_skills else 'There is no skills compatiable found.'}
110
  """
111
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  cv_exp_str = f"{cv_exp} years"
113
- jd_exp_str = f"{jd_exp} years (The extraction could list nay requirement, like: '5+ years')"
114
-
115
- return (
116
- match_output,
117
- cv_exp_str,
118
- jd_exp_str,
119
- sorted(list(cv_only_skills)),
120
- sorted(list(jd_only_skills))
121
- )
122
 
123
- # --- 3. Definição da Interface Gradio ---
124
 
125
- with gr.Blocks(title="Hirly - Resume & Job Description Analysis") as demo:
126
- gr.Markdown("# 🚀 Resume & Job Description Analysis")
 
 
127
  gr.Markdown(
128
- "Enter a resume and a job description for analysis "
129
- "skills, experience to see they're compatible"
130
  )
131
-
132
  with gr.Row():
133
  with gr.Column():
134
- cv_input = gr.Textbox(lines=20, label="Resume Text")
135
  with gr.Column():
136
- jd_input = gr.Textbox(lines=20, label="Job Description Text (JD)")
137
-
138
- analyze_button = gr.Button("Execute Analysis", variant="primary")
139
-
140
  gr.Markdown("---")
141
-
142
  with gr.Row():
143
  with gr.Column(scale=2):
144
- match_output = gr.Markdown(label="Match Results")
145
  with gr.Column(scale=1):
146
- cv_exp_output = gr.Textbox(label="Total Experience from Resume", interactive=False)
147
- jd_exp_output = gr.Textbox(label="Total Experience required from JD", interactive=False)
148
-
149
  with gr.Row():
150
- cv_only_output = gr.JSON(label="Resume Skills")
151
- jd_only_output = gr.JSON(label="JD Skills")
152
 
153
- # Conecta o botão à função
154
  analyze_button.click(
155
  fn=analyze_cv_and_jd,
156
  inputs=[cv_input, jd_input],
157
- outputs=[match_output, cv_exp_output, jd_exp_output, cv_only_output, jd_only_output]
 
 
 
 
 
 
158
  )
159
 
160
  if __name__ == "__main__":
 
1
  """
2
+ app.py (MULTI-LABEL V2 - English UI)
3
 
4
+ Gradio interface for the Entity Extraction Model
5
+ (SKILL, SOFT_SKILL, LANG, CERT, EXPERIENCE_DURATION)
6
+ Loads the trained model and provides a UI to compare CV and JD.
7
  """
8
 
9
  import gradio as gr
10
  import re
11
+ from typing import List, Dict, Set, Tuple
12
 
13
+ # Import the extractor we already created
 
14
  from scripts.inference import EntityExtractor
15
 
16
+ # --- 1. Model Loading ---
17
+ # --- MODIFICATION ---
18
+ # Point to the local model you just trained
19
+ MODEL_PATH = "models/hirly_ner_multi"
20
  try:
21
  extractor = EntityExtractor(MODEL_PATH)
22
+ print(f"Model loaded successfully from {MODEL_PATH}")
23
  except Exception as e:
24
+ print(f"CRITICAL ERROR: Could not load model from {MODEL_PATH}.")
25
+ print("Ensure the trained model is in the correct directory.")
 
26
  extractor = None
27
 
28
+ # --- 2. Business Logic (Unchanged) ---
29
+
30
 
31
  def parse_and_sum_experience(entities: List[Dict]) -> float:
32
  """
33
+ Parses 'EXPERIENCE_DURATION' spans and sums them into years.
34
+ (This function remains the same)
 
 
 
 
 
 
35
  """
36
  total_experience = 0.0
 
 
37
  num_words = {
38
+ "one": 1,
39
+ "two": 2,
40
+ "three": 3,
41
+ "four": 4,
42
+ "five": 5,
43
+ "six": 6,
44
+ "seven": 7,
45
+ "eight": 8,
46
+ "nine": 9,
47
+ "ten": 10,
48
  }
49
+ durations = [
50
+ e["entity"].lower() for e in entities if e["label"] == "EXPERIENCE_DURATION"
51
+ ]
52
+
53
  for text in durations:
54
  found_number = None
55
+ match = re.search(r"(\d+[\.,]\d+|\d+)", text)
 
 
 
56
  if match:
57
+ found_number = float(match.group(1).replace(",", "."))
58
  else:
 
59
  for word, number in num_words.items():
60
  if word in text:
61
  found_number = number
62
  break
63
+
64
  if found_number is not None:
65
+ if "month" in text or "mes" in text:
 
66
  total_experience += found_number / 12
67
  else:
 
68
  total_experience += found_number
69
+
70
  return round(total_experience, 1)
71
 
72
 
73
+ def extract_and_group_entities(
74
+ text: str, confidence_threshold: float
75
+ ) -> Dict[str, Set[str]]:
76
  """
77
+ Extracts entities from text and groups them by label.
 
78
  """
79
+ grouped_entities = {
80
+ "SKILL": set(),
81
+ "SOFT_SKILL": set(),
82
+ "LANG": set(),
83
+ "CERT": set(),
84
+ "EXPERIENCE_DURATION": set(),
85
+ }
86
+
87
+ entities = extractor.extract_entities_with_details(text, confidence_threshold)
88
+
89
+ for entity in entities:
90
+ label = entity.get("label")
91
+ if label in grouped_entities:
92
+ grouped_entities[label].add(entity["entity"].lower())
93
+
94
+ return grouped_entities
95
+
96
+
97
+ def analyze_cv_and_jd(cv_text: str, jd_text: str) -> (str, str, str, Dict, Dict):
98
+ """
99
+ Main function called by Gradio.
100
+ Processes CV and JD, finds all entities, sums experience, and compares.
 
 
 
 
101
  """
102
+ if not extractor:
103
+ return "ERROR: Model not loaded.", "", "", {}, {}
104
+
105
+ # 1. Process texts and group entities
106
+ cv_groups = extract_and_group_entities(cv_text, confidence_threshold=0.7)
107
+ jd_groups = extract_and_group_entities(jd_text, confidence_threshold=0.7)
108
+
109
+ # 2. Sum experience
110
+ cv_exp_entities = extractor.extract_entities_with_details(cv_text, 0.7)
111
+ jd_exp_entities = extractor.extract_entities_with_details(jd_text, 0.7)
112
+ cv_exp = parse_and_sum_experience(cv_exp_entities)
113
+ jd_exp = parse_and_sum_experience(jd_exp_entities)
114
+
115
+ # 3. Format Match Analysis output
116
+ match_output = "## 🚀 Match Analysis\n\n"
117
+
118
+ labels_to_match = ["SKILL", "SOFT_SKILL", "LANG", "CERT"]
119
+
120
+ for label in labels_to_match:
121
+ cv_set = cv_groups[label]
122
+ jd_set = jd_groups[label]
123
+
124
+ matching = cv_set.intersection(jd_set)
125
+
126
+ match_output += f"**Matching {label.replace('_', ' ')}S: {len(matching)}**\n"
127
+ if matching:
128
+ match_output += f"_{', '.join(sorted(list(matching)))}_\n"
129
+ else:
130
+ match_output += "_No matching items found._\n"
131
+ match_output += "---\n"
132
+
133
+ # 4. Format JSON outputs
134
+ cv_groups.pop("EXPERIENCE_DURATION")
135
+ jd_groups.pop("EXPERIENCE_DURATION")
136
+
137
+ cv_json_output = {k: sorted(list(v)) for k, v in cv_groups.items() if v}
138
+ jd_json_output = {k: sorted(list(v)) for k, v in jd_groups.items() if v}
139
+
140
  cv_exp_str = f"{cv_exp} years"
141
+ jd_exp_str = f"{jd_exp} years (Requirement extracted from JD)"
142
+
143
+ return (match_output, cv_exp_str, jd_exp_str, cv_json_output, jd_json_output)
 
 
 
 
 
 
144
 
 
145
 
146
+ # --- 3. Gradio Interface Definition (All English) ---
147
+
148
+ with gr.Blocks(title="Hirly - Resume & JD Analyzer") as demo:
149
+ gr.Markdown("# 🚀 Resume vs. Job Description Analyzer")
150
  gr.Markdown(
151
+ "Provide the text from a Resume (CV) and a Job Description (JD) to extract "
152
+ "skills, soft skills, languages, certifications, years of experience, and see their compatibility."
153
  )
154
+
155
  with gr.Row():
156
  with gr.Column():
157
+ cv_input = gr.Textbox(lines=20, label="Resume (CV) Text")
158
  with gr.Column():
159
+ jd_input = gr.Textbox(lines=20, label="Job Description (JD) Text")
160
+
161
+ analyze_button = gr.Button("Analyze Compatibility", variant="primary")
162
+
163
  gr.Markdown("---")
164
+
165
  with gr.Row():
166
  with gr.Column(scale=2):
167
+ match_output = gr.Markdown(label="Match Analysis")
168
  with gr.Column(scale=1):
169
+ cv_exp_output = gr.Textbox(label="Total Experience (CV)", interactive=False)
170
+ jd_exp_output = gr.Textbox(label="Total Experience (JD)", interactive=False)
171
+
172
  with gr.Row():
173
+ cv_only_output = gr.JSON(label="Entities Found in CV")
174
+ jd_only_output = gr.JSON(label="Entities Required by JD")
175
 
176
+ # Connect button to function
177
  analyze_button.click(
178
  fn=analyze_cv_and_jd,
179
  inputs=[cv_input, jd_input],
180
+ outputs=[
181
+ match_output,
182
+ cv_exp_output,
183
+ jd_exp_output,
184
+ cv_only_output,
185
+ jd_only_output,
186
+ ],
187
  )
188
 
189
  if __name__ == "__main__":