Spaces:
Sleeping
Sleeping
File size: 8,699 Bytes
0aa4cf0 288a757 c2a4a9d 2f104bc 0aa4cf0 288a757 0aa4cf0 288a757 0aa4cf0 288a757 2f104bc 92f02a4 2f104bc 92f02a4 2f104bc 92f02a4 288a757 92f02a4 288a757 92f02a4 f50ef64 288a757 3928d34 2612aa5 69cfc08 3928d34 69cfc08 a7f8a02 69cfc08 32f2578 288a757 f50ef64 288a757 92f02a4 288a757 f287b86 92f02a4 288a757 f50ef64 92f02a4 f50ef64 288a757 92f02a4 288a757 92f02a4 288a757 92f02a4 288a757 92f02a4 288a757 33e96f0 f50ef64 d03b3c6 92f02a4 f287b86 92f02a4 d03b3c6 92f02a4 f287b86 92f02a4 d03b3c6 33e96f0 92f02a4 d03b3c6 92f02a4 d03b3c6 92f02a4 d03b3c6 92f02a4 d03b3c6 92f02a4 d03b3c6 2f104bc c2a4a9d 288a757 92f02a4 c2a4a9d 288a757 92f02a4 288a757 92f02a4 2f104bc 288a757 92f02a4 288a757 f50ef64 288a757 92f02a4 288a757 92f02a4 d03b3c6 f50ef64 92f02a4 bdad7e7 6ecd3d8 92f02a4 bdad7e7 288a757 bdad7e7 6ecd3d8 92f02a4 6ecd3d8 bdad7e7 92f02a4 288a757 6ecd3d8 bdad7e7 33e96f0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | import gradio as gr
import PyPDF2
import docx
import re
from datetime import datetime
from transformers import pipeline
# Ielādē NER modeli
print("🤖 Ielādē XLM-RoBERTa NER modeli...")
ner_model = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", aggregation_strategy="simple")
print("✅ NER modelis gatavs!")
# Teksta ekstrakcija
def extract_text_from_pdf(file_obj):
pdf_reader = PyPDF2.PdfReader(file_obj)
return ''.join([page.extract_text() for page in pdf_reader.pages])
def extract_text_from_docx(file_obj):
doc = docx.Document(file_obj)
return '\n'.join([para.text for para in doc.paragraphs])
def extract_text_from_txt(file_obj):
return file_obj.read().decode('utf-8')
# UZLABOTA vārda ekstrakcija ar NER
def extract_name_with_ner(text):
try:
entities = ner_model(text[:500])
for entity in entities:
if entity['entity_group'] == 'PER' and entity['score'] > 0.7:
name = entity['word'].strip()
if len(name.split()) >= 2:
return name
# Fallback regex
lines = text.split('\n')
for line in lines[:5]:
line = line.strip()
if re.search(r'curriculum vitae|cv|resume|životopiss', line, re.IGNORECASE):
continue
if re.match(r'^[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+\s+[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+', line):
return line
return "Nav atrasts"
except:
return "Nav atrasts"
def extract_email(text):
match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
return match.group(0) if match else "Nav atrasts"
def extract_phone(text):
match = re.search(r'\+?\d[\d\s-]{7,}\d', text)
return match.group(0).strip() if match else "Nav atrasts"
# Analīzes funkcijas
def analyze_experience(text):
# Meklē dažādus datumu formātus
# Formāts 1: MM/YYYY to MM/YYYY vai MM/YYYY - MM/YYYY
pattern1 = r'(\d{1,2}[/.]\d{4})\s*(?:to\s+|-|–|—)\s*(\d{1,2}[/.]\d{4}|tagad|present|šobrīd)'
# Formāts 2: YYYY-YYYY vai YYYY - YYYY
pattern2 = r'(\d{4})\s*[-–—]\s*(\d{4}|tagad|present|šobrīd)'
# Meklē ar abiem formātiem
matches1 = re.findall(pattern1, text, re.IGNORECASE)
matches2 = re.findall(pattern2, text, re.IGNORECASE)
total_years = 0
# Apstrādā MM/YYYY formātu
for start, end in matches1:
try:
# Izvelk gadus no MM/YYYY formāta
start_year = int(start.split('/')[-1].split('.')[-1])
if end.lower() in ['tagad', 'present', 'šobrīd']:
end_year = datetime.now().year
else:
end_year = int(end.split('/')[-1].split('.')[-1])
except:
continue
# Apstrādā YYYY formātu
for start, end in matches2:
try:
start_year = int(start)
if end.lower() in ['tagad', 'present', 'šobrīd']:
end_year = datetime.now().year
else:
end_year = int(end)
total_years += (end_year - start_year)
except:
continue
# Punktu skaits pēc pieredzes
if total_years >= 6:
exp_score = 30
elif total_years >= 4:
exp_score = 20
elif total_years >= 3:
exp_score = 15
elif total_years >= 2:
exp_score = 10
elif total_years >= 1:
exp_score = 5
else:
exp_score = 0
return exp_score, f"{total_years} gadi"
def analyze_education(text):
text_lower = text.lower()
education_levels = [
(['phd', 'doktor', 'dr.'], 30, 'Doktorantūra'),
(['maģistr', 'master'], 25, 'Maģistra grāds'),
(['bakalaur', 'bachelor'], 20, 'Bakalaura grāds'),
(['universitāte', 'university', 'college', 'augstskola'], 15, 'Augstākā izglītība'),
(['vidusskola', 'high school', 'secondary'], 10, 'Vidējā izglītība')
]
for keywords, score, level in education_levels:
if any(word in text_lower for word in keywords):
return score, level
return 0, "Nav norādīts"
def analyze_skills(text):
technical_skills = ['python', 'java', 'javascript', 'c++', 'sql', 'machine learning',
'data analysis', 'excel', 'powerpoint', 'word', 'project management']
found_skills = [skill for skill in technical_skills if skill.lower() in text.lower()]
return min(len(found_skills) * 3, 20), ', '.join(found_skills) or 'Nav atrasts'
def analyze_languages(text):
languages = {
'latviešu': ['latvie', 'latvian'],
'angļu': ['angļ', 'english'],
'krievu': ['kriev', 'russian'],
'vācu': ['vāc', 'german', 'deutsch'],
'franču': ['franč', 'french', 'français'],
'spāņu': ['spāņ', 'spanish', 'español']
}
found = [lang for lang, patterns in languages.items() if any(p in text.lower() for p in patterns)]
return min(len(found) * 5, 20), ', '.join(found) or 'Nav norādīts'
# Ģenerē aprakstu par kandidātu
def generate_candidate_description(name, experience, education, skills, languages):
"""Ģenerē īsu aprakstu par kandidātu balstoties uz CV datiem"""
# Sāk ar vārdu
if name == "Nav atrasts":
description = "Kandidāts"
else:
description = name
# Pievieno izglītību
if education and education != "Nav norādīts":
description += f" ar {education.lower()}"
# Pievieno pieredzi
if experience and "gadi" in experience:
description += f", kam ir {experience} darba pieredze"
# Pievieno prasmes
if skills and skills != "Nav atrasts":
skills_list = skills.split(", ")
if len(skills_list) > 3:
main_skills = ", ".join(skills_list[:3])
description += f". Pārzina {main_skills} un citas tehnoloģijas"
else:
description += f". Pārzina {skills}"
# Pievieno valodas
if languages and languages != "Nav norādīts":
description += f". Runā {languages} valodās"
description += "."
return description
# Galvenā CV analīzes funkcija
def analyze_cv(file):
if file is None:
return "⚠️ Lūdzu, augšupielādējiet CV failu!"
try:
file_name = file.name
ext = file_name.split('.')[-1].lower()
if ext == 'pdf':
text = extract_text_from_pdf(file)
elif ext == 'docx':
text = extract_text_from_docx(file)
elif ext == 'txt':
text = extract_text_from_txt(file)
else:
return "❌ Neatbalstīts faila formāts! Atbalstītie: PDF, DOCX, TXT"
name = extract_name_with_ner(text)
email = extract_email(text)
phone = extract_phone(text)
exp_score, experience = analyze_experience(text)
edu_score, education = analyze_education(text)
skill_score, skills = analyze_skills(text)
lang_score, languages = analyze_languages(text)
total = exp_score + edu_score + skill_score + lang_score
# Ģenerē aprakstu par kandidātu
candidate_description = generate_candidate_description(name, experience, education, skills, languages)
return f"""📊 REZULTĀTI: {total}/100
────────────────────────────
👤 {name} | 📧 {email} | 📱 {phone}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━
📝 {candidate_description}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━────────────────────────────
💼 Pieredze: {exp_score}/30 ({experience})
🎓 Izglītība: {edu_score}/30 ({education})
💻 Prasmes: {skill_score}/20 ({skills})
🌐 Valodas: {lang_score}/20 ({languages})
"""
except Exception as e:
return f"❌ Kļūda apstrādājot failu: {str(e)}"
# Gradio interfeiss
demo = gr.Interface(
fn=analyze_cv,
inputs=gr.File(label="Ielādējiet CV failu", file_types=['.pdf', '.docx', '.txt']),
outputs=gr.Textbox(label="Analīzes rezultāti", lines=25),
title="📄 CV Automatīskās Analīzes Sistēma",
description="""Augšupielādējiet CV failu (PDF, DOCX vai TXT), un sistēma automatīski analizēs:
- 👤 Personīgo informāciju
- 💼 Darba pieredzi
- 🎓 Izglītību
- 🌐 Valodu prasmes
- 📚 Tehniskās prasmes
**Rezultāti tiek vērtēti 100 punktu skalā**
"""
)
if __name__ == "__main__":
demo.launch() |