import gradio as gr import PyPDF2 import docx import re from datetime import datetime from transformers import pipeline # Ielādē NER modeli print("🤖 Ielādē XLM-RoBERTa NER modeli...") ner_model = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", aggregation_strategy="simple") print("✅ NER modelis gatavs!") # Teksta ekstrakcija def extract_text_from_pdf(file_obj): pdf_reader = PyPDF2.PdfReader(file_obj) return ''.join([page.extract_text() for page in pdf_reader.pages]) def extract_text_from_docx(file_obj): doc = docx.Document(file_obj) return '\n'.join([para.text for para in doc.paragraphs]) def extract_text_from_txt(file_obj): return file_obj.read().decode('utf-8') # UZLABOTA vārda ekstrakcija ar NER def extract_name_with_ner(text): try: entities = ner_model(text[:500]) for entity in entities: if entity['entity_group'] == 'PER' and entity['score'] > 0.7: name = entity['word'].strip() if len(name.split()) >= 2: return name # Fallback regex lines = text.split('\n') for line in lines[:5]: line = line.strip() if re.search(r'curriculum vitae|cv|resume|životopiss', line, re.IGNORECASE): continue if re.match(r'^[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+\s+[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+', line): return line return "Nav atrasts" except: return "Nav atrasts" def extract_email(text): match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text) return match.group(0) if match else "Nav atrasts" def extract_phone(text): match = re.search(r'\+?\d[\d\s-]{7,}\d', text) return match.group(0).strip() if match else "Nav atrasts" # Analīzes funkcijas def analyze_experience(text): # Meklē dažādus datumu formātus # Formāts 1: MM/YYYY to MM/YYYY vai MM/YYYY - MM/YYYY pattern1 = r'(\d{1,2}[/.]\d{4})\s*(?:to\s+|-|–|—)\s*(\d{1,2}[/.]\d{4}|tagad|present|šobrīd)' # Formāts 2: YYYY-YYYY vai YYYY - YYYY pattern2 = r'(\d{4})\s*[-–—]\s*(\d{4}|tagad|present|šobrīd)' # Meklē ar abiem formātiem matches1 = re.findall(pattern1, text, re.IGNORECASE) matches2 = re.findall(pattern2, text, re.IGNORECASE) total_years = 0 # Apstrādā MM/YYYY formātu for start, end in matches1: try: # Izvelk gadus no MM/YYYY formāta start_year = int(start.split('/')[-1].split('.')[-1]) if end.lower() in ['tagad', 'present', 'šobrīd']: end_year = datetime.now().year else: end_year = int(end.split('/')[-1].split('.')[-1]) except: continue # Apstrādā YYYY formātu for start, end in matches2: try: start_year = int(start) if end.lower() in ['tagad', 'present', 'šobrīd']: end_year = datetime.now().year else: end_year = int(end) total_years += (end_year - start_year) except: continue # Punktu skaits pēc pieredzes if total_years >= 6: exp_score = 30 elif total_years >= 4: exp_score = 20 elif total_years >= 3: exp_score = 15 elif total_years >= 2: exp_score = 10 elif total_years >= 1: exp_score = 5 else: exp_score = 0 return exp_score, f"{total_years} gadi" def analyze_education(text): text_lower = text.lower() education_levels = [ (['phd', 'doktor', 'dr.'], 30, 'Doktorantūra'), (['maģistr', 'master'], 25, 'Maģistra grāds'), (['bakalaur', 'bachelor'], 20, 'Bakalaura grāds'), (['universitāte', 'university', 'college', 'augstskola'], 15, 'Augstākā izglītība'), (['vidusskola', 'high school', 'secondary'], 10, 'Vidējā izglītība') ] for keywords, score, level in education_levels: if any(word in text_lower for word in keywords): return score, level return 0, "Nav norādīts" def analyze_skills(text): technical_skills = ['python', 'java', 'javascript', 'c++', 'sql', 'machine learning', 'data analysis', 'excel', 'powerpoint', 'word', 'project management'] found_skills = [skill for skill in technical_skills if skill.lower() in text.lower()] return min(len(found_skills) * 3, 20), ', '.join(found_skills) or 'Nav atrasts' def analyze_languages(text): languages = { 'latviešu': ['latvie', 'latvian'], 'angļu': ['angļ', 'english'], 'krievu': ['kriev', 'russian'], 'vācu': ['vāc', 'german', 'deutsch'], 'franču': ['franč', 'french', 'français'], 'spāņu': ['spāņ', 'spanish', 'español'] } found = [lang for lang, patterns in languages.items() if any(p in text.lower() for p in patterns)] return min(len(found) * 5, 20), ', '.join(found) or 'Nav norādīts' # Ģenerē aprakstu par kandidātu def generate_candidate_description(name, experience, education, skills, languages): """Ģenerē īsu aprakstu par kandidātu balstoties uz CV datiem""" # Sāk ar vārdu if name == "Nav atrasts": description = "Kandidāts" else: description = name # Pievieno izglītību if education and education != "Nav norādīts": description += f" ar {education.lower()}" # Pievieno pieredzi if experience and "gadi" in experience: description += f", kam ir {experience} darba pieredze" # Pievieno prasmes if skills and skills != "Nav atrasts": skills_list = skills.split(", ") if len(skills_list) > 3: main_skills = ", ".join(skills_list[:3]) description += f". Pārzina {main_skills} un citas tehnoloģijas" else: description += f". Pārzina {skills}" # Pievieno valodas if languages and languages != "Nav norādīts": description += f". Runā {languages} valodās" description += "." return description # Galvenā CV analīzes funkcija def analyze_cv(file): if file is None: return "⚠️ Lūdzu, augšupielādējiet CV failu!" try: file_name = file.name ext = file_name.split('.')[-1].lower() if ext == 'pdf': text = extract_text_from_pdf(file) elif ext == 'docx': text = extract_text_from_docx(file) elif ext == 'txt': text = extract_text_from_txt(file) else: return "❌ Neatbalstīts faila formāts! Atbalstītie: PDF, DOCX, TXT" name = extract_name_with_ner(text) email = extract_email(text) phone = extract_phone(text) exp_score, experience = analyze_experience(text) edu_score, education = analyze_education(text) skill_score, skills = analyze_skills(text) lang_score, languages = analyze_languages(text) total = exp_score + edu_score + skill_score + lang_score # Ģenerē aprakstu par kandidātu candidate_description = generate_candidate_description(name, experience, education, skills, languages) return f"""📊 REZULTĀTI: {total}/100 ──────────────────────────── 👤 {name} | 📧 {email} | 📱 {phone} ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 📝 {candidate_description} ━━━━━━━━━━━━━━━━━━━━━━━━━━━━──────────────────────────── 💼 Pieredze: {exp_score}/30 ({experience}) 🎓 Izglītība: {edu_score}/30 ({education}) 💻 Prasmes: {skill_score}/20 ({skills}) 🌐 Valodas: {lang_score}/20 ({languages}) """ except Exception as e: return f"❌ Kļūda apstrādājot failu: {str(e)}" # Gradio interfeiss demo = gr.Interface( fn=analyze_cv, inputs=gr.File(label="Ielādējiet CV failu", file_types=['.pdf', '.docx', '.txt']), outputs=gr.Textbox(label="Analīzes rezultāti", lines=25), title="📄 CV Automatīskās Analīzes Sistēma", description="""Augšupielādējiet CV failu (PDF, DOCX vai TXT), un sistēma automatīski analizēs: - 👤 Personīgo informāciju - 💼 Darba pieredzi - 🎓 Izglītību - 🌐 Valodu prasmes - 📚 Tehniskās prasmes **Rezultāti tiek vērtēti 100 punktu skalā** """ ) if __name__ == "__main__": demo.launch()