Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import PyPDF2 | |
| import docx | |
| import re | |
| from datetime import datetime | |
| from transformers import pipeline | |
| # Ielādē NER modeli | |
| print("🤖 Ielādē XLM-RoBERTa NER modeli...") | |
| ner_model = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", aggregation_strategy="simple") | |
| print("✅ NER modelis gatavs!") | |
| # Teksta ekstrakcija | |
| def extract_text_from_pdf(file_obj): | |
| pdf_reader = PyPDF2.PdfReader(file_obj) | |
| return ''.join([page.extract_text() for page in pdf_reader.pages]) | |
| def extract_text_from_docx(file_obj): | |
| doc = docx.Document(file_obj) | |
| return '\n'.join([para.text for para in doc.paragraphs]) | |
| def extract_text_from_txt(file_obj): | |
| return file_obj.read().decode('utf-8') | |
| # UZLABOTA vārda ekstrakcija ar NER | |
| def extract_name_with_ner(text): | |
| try: | |
| entities = ner_model(text[:500]) | |
| for entity in entities: | |
| if entity['entity_group'] == 'PER' and entity['score'] > 0.7: | |
| name = entity['word'].strip() | |
| if len(name.split()) >= 2: | |
| return name | |
| # Fallback regex | |
| lines = text.split('\n') | |
| for line in lines[:5]: | |
| line = line.strip() | |
| if re.search(r'curriculum vitae|cv|resume|životopiss', line, re.IGNORECASE): | |
| continue | |
| if re.match(r'^[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+\s+[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+', line): | |
| return line | |
| return "Nav atrasts" | |
| except: | |
| return "Nav atrasts" | |
| def extract_email(text): | |
| match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text) | |
| return match.group(0) if match else "Nav atrasts" | |
| def extract_phone(text): | |
| match = re.search(r'\+?\d[\d\s-]{7,}\d', text) | |
| return match.group(0).strip() if match else "Nav atrasts" | |
| # Analīzes funkcijas | |
| def analyze_experience(text): | |
| # Meklē dažādus datumu formātus | |
| # Formāts 1: MM/YYYY to MM/YYYY vai MM/YYYY - MM/YYYY | |
| pattern1 = r'(\d{1,2}[/.]\d{4})\s*(?:to\s+|-|–|—)\s*(\d{1,2}[/.]\d{4}|tagad|present|šobrīd)' | |
| # Formāts 2: YYYY-YYYY vai YYYY - YYYY | |
| pattern2 = r'(\d{4})\s*[-–—]\s*(\d{4}|tagad|present|šobrīd)' | |
| # Meklē ar abiem formātiem | |
| matches1 = re.findall(pattern1, text, re.IGNORECASE) | |
| matches2 = re.findall(pattern2, text, re.IGNORECASE) | |
| total_years = 0 | |
| # Apstrādā MM/YYYY formātu | |
| for start, end in matches1: | |
| try: | |
| # Izvelk gadus no MM/YYYY formāta | |
| start_year = int(start.split('/')[-1].split('.')[-1]) | |
| if end.lower() in ['tagad', 'present', 'šobrīd']: | |
| end_year = datetime.now().year | |
| else: | |
| end_year = int(end.split('/')[-1].split('.')[-1]) | |
| except: | |
| continue | |
| # Apstrādā YYYY formātu | |
| for start, end in matches2: | |
| try: | |
| start_year = int(start) | |
| if end.lower() in ['tagad', 'present', 'šobrīd']: | |
| end_year = datetime.now().year | |
| else: | |
| end_year = int(end) | |
| total_years += (end_year - start_year) | |
| except: | |
| continue | |
| # Punktu skaits pēc pieredzes | |
| if total_years >= 6: | |
| exp_score = 30 | |
| elif total_years >= 4: | |
| exp_score = 20 | |
| elif total_years >= 3: | |
| exp_score = 15 | |
| elif total_years >= 2: | |
| exp_score = 10 | |
| elif total_years >= 1: | |
| exp_score = 5 | |
| else: | |
| exp_score = 0 | |
| return exp_score, f"{total_years} gadi" | |
| def analyze_education(text): | |
| text_lower = text.lower() | |
| education_levels = [ | |
| (['phd', 'doktor', 'dr.'], 30, 'Doktorantūra'), | |
| (['maģistr', 'master'], 25, 'Maģistra grāds'), | |
| (['bakalaur', 'bachelor'], 20, 'Bakalaura grāds'), | |
| (['universitāte', 'university', 'college', 'augstskola'], 15, 'Augstākā izglītība'), | |
| (['vidusskola', 'high school', 'secondary'], 10, 'Vidējā izglītība') | |
| ] | |
| for keywords, score, level in education_levels: | |
| if any(word in text_lower for word in keywords): | |
| return score, level | |
| return 0, "Nav norādīts" | |
| def analyze_skills(text): | |
| technical_skills = ['python', 'java', 'javascript', 'c++', 'sql', 'machine learning', | |
| 'data analysis', 'excel', 'powerpoint', 'word', 'project management'] | |
| found_skills = [skill for skill in technical_skills if skill.lower() in text.lower()] | |
| return min(len(found_skills) * 3, 20), ', '.join(found_skills) or 'Nav atrasts' | |
| def analyze_languages(text): | |
| languages = { | |
| 'latviešu': ['latvie', 'latvian'], | |
| 'angļu': ['angļ', 'english'], | |
| 'krievu': ['kriev', 'russian'], | |
| 'vācu': ['vāc', 'german', 'deutsch'], | |
| 'franču': ['franč', 'french', 'français'], | |
| 'spāņu': ['spāņ', 'spanish', 'español'] | |
| } | |
| found = [lang for lang, patterns in languages.items() if any(p in text.lower() for p in patterns)] | |
| return min(len(found) * 5, 20), ', '.join(found) or 'Nav norādīts' | |
| # Ģenerē aprakstu par kandidātu | |
| def generate_candidate_description(name, experience, education, skills, languages): | |
| """Ģenerē īsu aprakstu par kandidātu balstoties uz CV datiem""" | |
| # Sāk ar vārdu | |
| if name == "Nav atrasts": | |
| description = "Kandidāts" | |
| else: | |
| description = name | |
| # Pievieno izglītību | |
| if education and education != "Nav norādīts": | |
| description += f" ar {education.lower()}" | |
| # Pievieno pieredzi | |
| if experience and "gadi" in experience: | |
| description += f", kam ir {experience} darba pieredze" | |
| # Pievieno prasmes | |
| if skills and skills != "Nav atrasts": | |
| skills_list = skills.split(", ") | |
| if len(skills_list) > 3: | |
| main_skills = ", ".join(skills_list[:3]) | |
| description += f". Pārzina {main_skills} un citas tehnoloģijas" | |
| else: | |
| description += f". Pārzina {skills}" | |
| # Pievieno valodas | |
| if languages and languages != "Nav norādīts": | |
| description += f". Runā {languages} valodās" | |
| description += "." | |
| return description | |
| # Galvenā CV analīzes funkcija | |
| def analyze_cv(file): | |
| if file is None: | |
| return "⚠️ Lūdzu, augšupielādējiet CV failu!" | |
| try: | |
| file_name = file.name | |
| ext = file_name.split('.')[-1].lower() | |
| if ext == 'pdf': | |
| text = extract_text_from_pdf(file) | |
| elif ext == 'docx': | |
| text = extract_text_from_docx(file) | |
| elif ext == 'txt': | |
| text = extract_text_from_txt(file) | |
| else: | |
| return "❌ Neatbalstīts faila formāts! Atbalstītie: PDF, DOCX, TXT" | |
| name = extract_name_with_ner(text) | |
| email = extract_email(text) | |
| phone = extract_phone(text) | |
| exp_score, experience = analyze_experience(text) | |
| edu_score, education = analyze_education(text) | |
| skill_score, skills = analyze_skills(text) | |
| lang_score, languages = analyze_languages(text) | |
| total = exp_score + edu_score + skill_score + lang_score | |
| # Ģenerē aprakstu par kandidātu | |
| candidate_description = generate_candidate_description(name, experience, education, skills, languages) | |
| return f"""📊 REZULTĀTI: {total}/100 | |
| ──────────────────────────── | |
| 👤 {name} | 📧 {email} | 📱 {phone} | |
| ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ | |
| 📝 {candidate_description} | |
| ━━━━━━━━━━━━━━━━━━━━━━━━━━━━──────────────────────────── | |
| 💼 Pieredze: {exp_score}/30 ({experience}) | |
| 🎓 Izglītība: {edu_score}/30 ({education}) | |
| 💻 Prasmes: {skill_score}/20 ({skills}) | |
| 🌐 Valodas: {lang_score}/20 ({languages}) | |
| """ | |
| except Exception as e: | |
| return f"❌ Kļūda apstrādājot failu: {str(e)}" | |
| # Gradio interfeiss | |
| demo = gr.Interface( | |
| fn=analyze_cv, | |
| inputs=gr.File(label="Ielādējiet CV failu", file_types=['.pdf', '.docx', '.txt']), | |
| outputs=gr.Textbox(label="Analīzes rezultāti", lines=25), | |
| title="📄 CV Automatīskās Analīzes Sistēma", | |
| description="""Augšupielādējiet CV failu (PDF, DOCX vai TXT), un sistēma automatīski analizēs: | |
| - 👤 Personīgo informāciju | |
| - 💼 Darba pieredzi | |
| - 🎓 Izglītību | |
| - 🌐 Valodu prasmes | |
| - 📚 Tehniskās prasmes | |
| **Rezultāti tiek vērtēti 100 punktu skalā** | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |