cv-analyzer / app.py
Dragonanta67's picture
Atgriez uz versiju 2612aa5
3928d34 verified
import gradio as gr
import PyPDF2
import docx
import re
from datetime import datetime
from transformers import pipeline
# Ielādē NER modeli
print("🤖 Ielādē XLM-RoBERTa NER modeli...")
ner_model = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", aggregation_strategy="simple")
print("✅ NER modelis gatavs!")
# Teksta ekstrakcija
def extract_text_from_pdf(file_obj):
pdf_reader = PyPDF2.PdfReader(file_obj)
return ''.join([page.extract_text() for page in pdf_reader.pages])
def extract_text_from_docx(file_obj):
doc = docx.Document(file_obj)
return '\n'.join([para.text for para in doc.paragraphs])
def extract_text_from_txt(file_obj):
return file_obj.read().decode('utf-8')
# UZLABOTA vārda ekstrakcija ar NER
def extract_name_with_ner(text):
try:
entities = ner_model(text[:500])
for entity in entities:
if entity['entity_group'] == 'PER' and entity['score'] > 0.7:
name = entity['word'].strip()
if len(name.split()) >= 2:
return name
# Fallback regex
lines = text.split('\n')
for line in lines[:5]:
line = line.strip()
if re.search(r'curriculum vitae|cv|resume|životopiss', line, re.IGNORECASE):
continue
if re.match(r'^[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+\s+[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+', line):
return line
return "Nav atrasts"
except:
return "Nav atrasts"
def extract_email(text):
match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
return match.group(0) if match else "Nav atrasts"
def extract_phone(text):
match = re.search(r'\+?\d[\d\s-]{7,}\d', text)
return match.group(0).strip() if match else "Nav atrasts"
# Analīzes funkcijas
def analyze_experience(text):
# Meklē dažādus datumu formātus
# Formāts 1: MM/YYYY to MM/YYYY vai MM/YYYY - MM/YYYY
pattern1 = r'(\d{1,2}[/.]\d{4})\s*(?:to\s+|-|–|—)\s*(\d{1,2}[/.]\d{4}|tagad|present|šobrīd)'
# Formāts 2: YYYY-YYYY vai YYYY - YYYY
pattern2 = r'(\d{4})\s*[-–—]\s*(\d{4}|tagad|present|šobrīd)'
# Meklē ar abiem formātiem
matches1 = re.findall(pattern1, text, re.IGNORECASE)
matches2 = re.findall(pattern2, text, re.IGNORECASE)
total_years = 0
# Apstrādā MM/YYYY formātu
for start, end in matches1:
try:
# Izvelk gadus no MM/YYYY formāta
start_year = int(start.split('/')[-1].split('.')[-1])
if end.lower() in ['tagad', 'present', 'šobrīd']:
end_year = datetime.now().year
else:
end_year = int(end.split('/')[-1].split('.')[-1])
except:
continue
# Apstrādā YYYY formātu
for start, end in matches2:
try:
start_year = int(start)
if end.lower() in ['tagad', 'present', 'šobrīd']:
end_year = datetime.now().year
else:
end_year = int(end)
total_years += (end_year - start_year)
except:
continue
# Punktu skaits pēc pieredzes
if total_years >= 6:
exp_score = 30
elif total_years >= 4:
exp_score = 20
elif total_years >= 3:
exp_score = 15
elif total_years >= 2:
exp_score = 10
elif total_years >= 1:
exp_score = 5
else:
exp_score = 0
return exp_score, f"{total_years} gadi"
def analyze_education(text):
text_lower = text.lower()
education_levels = [
(['phd', 'doktor', 'dr.'], 30, 'Doktorantūra'),
(['maģistr', 'master'], 25, 'Maģistra grāds'),
(['bakalaur', 'bachelor'], 20, 'Bakalaura grāds'),
(['universitāte', 'university', 'college', 'augstskola'], 15, 'Augstākā izglītība'),
(['vidusskola', 'high school', 'secondary'], 10, 'Vidējā izglītība')
]
for keywords, score, level in education_levels:
if any(word in text_lower for word in keywords):
return score, level
return 0, "Nav norādīts"
def analyze_skills(text):
technical_skills = ['python', 'java', 'javascript', 'c++', 'sql', 'machine learning',
'data analysis', 'excel', 'powerpoint', 'word', 'project management']
found_skills = [skill for skill in technical_skills if skill.lower() in text.lower()]
return min(len(found_skills) * 3, 20), ', '.join(found_skills) or 'Nav atrasts'
def analyze_languages(text):
languages = {
'latviešu': ['latvie', 'latvian'],
'angļu': ['angļ', 'english'],
'krievu': ['kriev', 'russian'],
'vācu': ['vāc', 'german', 'deutsch'],
'franču': ['franč', 'french', 'français'],
'spāņu': ['spāņ', 'spanish', 'español']
}
found = [lang for lang, patterns in languages.items() if any(p in text.lower() for p in patterns)]
return min(len(found) * 5, 20), ', '.join(found) or 'Nav norādīts'
# Ģenerē aprakstu par kandidātu
def generate_candidate_description(name, experience, education, skills, languages):
"""Ģenerē īsu aprakstu par kandidātu balstoties uz CV datiem"""
# Sāk ar vārdu
if name == "Nav atrasts":
description = "Kandidāts"
else:
description = name
# Pievieno izglītību
if education and education != "Nav norādīts":
description += f" ar {education.lower()}"
# Pievieno pieredzi
if experience and "gadi" in experience:
description += f", kam ir {experience} darba pieredze"
# Pievieno prasmes
if skills and skills != "Nav atrasts":
skills_list = skills.split(", ")
if len(skills_list) > 3:
main_skills = ", ".join(skills_list[:3])
description += f". Pārzina {main_skills} un citas tehnoloģijas"
else:
description += f". Pārzina {skills}"
# Pievieno valodas
if languages and languages != "Nav norādīts":
description += f". Runā {languages} valodās"
description += "."
return description
# Galvenā CV analīzes funkcija
def analyze_cv(file):
if file is None:
return "⚠️ Lūdzu, augšupielādējiet CV failu!"
try:
file_name = file.name
ext = file_name.split('.')[-1].lower()
if ext == 'pdf':
text = extract_text_from_pdf(file)
elif ext == 'docx':
text = extract_text_from_docx(file)
elif ext == 'txt':
text = extract_text_from_txt(file)
else:
return "❌ Neatbalstīts faila formāts! Atbalstītie: PDF, DOCX, TXT"
name = extract_name_with_ner(text)
email = extract_email(text)
phone = extract_phone(text)
exp_score, experience = analyze_experience(text)
edu_score, education = analyze_education(text)
skill_score, skills = analyze_skills(text)
lang_score, languages = analyze_languages(text)
total = exp_score + edu_score + skill_score + lang_score
# Ģenerē aprakstu par kandidātu
candidate_description = generate_candidate_description(name, experience, education, skills, languages)
return f"""📊 REZULTĀTI: {total}/100
────────────────────────────
👤 {name} | 📧 {email} | 📱 {phone}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━
📝 {candidate_description}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━────────────────────────────
💼 Pieredze: {exp_score}/30 ({experience})
🎓 Izglītība: {edu_score}/30 ({education})
💻 Prasmes: {skill_score}/20 ({skills})
🌐 Valodas: {lang_score}/20 ({languages})
"""
except Exception as e:
return f"❌ Kļūda apstrādājot failu: {str(e)}"
# Gradio interfeiss
demo = gr.Interface(
fn=analyze_cv,
inputs=gr.File(label="Ielādējiet CV failu", file_types=['.pdf', '.docx', '.txt']),
outputs=gr.Textbox(label="Analīzes rezultāti", lines=25),
title="📄 CV Automatīskās Analīzes Sistēma",
description="""Augšupielādējiet CV failu (PDF, DOCX vai TXT), un sistēma automatīski analizēs:
- 👤 Personīgo informāciju
- 💼 Darba pieredzi
- 🎓 Izglītību
- 🌐 Valodu prasmes
- 📚 Tehniskās prasmes
**Rezultāti tiek vērtēti 100 punktu skalā**
"""
)
if __name__ == "__main__":
demo.launch()