File size: 8,699 Bytes
0aa4cf0
288a757
 
 
c2a4a9d
2f104bc
 
 
 
 
 
0aa4cf0
288a757
 
 
 
0aa4cf0
288a757
 
 
0aa4cf0
288a757
 
 
2f104bc
 
 
 
 
 
 
 
 
92f02a4
2f104bc
 
 
 
 
 
 
 
92f02a4
2f104bc
92f02a4
288a757
 
 
92f02a4
288a757
 
 
92f02a4
f50ef64
288a757
 
3928d34
 
2612aa5
69cfc08
 
 
 
3928d34
 
 
69cfc08
 
 
 
 
 
 
 
 
 
a7f8a02
69cfc08
 
 
 
 
 
 
 
 
 
 
 
 
 
32f2578
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288a757
f50ef64
 
288a757
 
92f02a4
288a757
 
 
 
f287b86
92f02a4
288a757
f50ef64
92f02a4
f50ef64
288a757
92f02a4
 
288a757
92f02a4
288a757
 
 
92f02a4
 
 
 
 
 
288a757
 
92f02a4
288a757
33e96f0
f50ef64
 
d03b3c6
92f02a4
f287b86
 
 
92f02a4
d03b3c6
92f02a4
f287b86
92f02a4
d03b3c6
 
33e96f0
92f02a4
d03b3c6
92f02a4
d03b3c6
 
 
 
 
 
92f02a4
d03b3c6
92f02a4
d03b3c6
92f02a4
d03b3c6
 
 
2f104bc
c2a4a9d
288a757
 
92f02a4
c2a4a9d
288a757
 
92f02a4
288a757
 
 
 
 
 
 
 
92f02a4
2f104bc
288a757
 
92f02a4
288a757
f50ef64
 
288a757
92f02a4
288a757
92f02a4
d03b3c6
f50ef64
92f02a4
bdad7e7
 
 
 
 
 
 
 
 
 
6ecd3d8
92f02a4
bdad7e7
288a757
 
 
 
 
 
bdad7e7
6ecd3d8
 
92f02a4
6ecd3d8
bdad7e7
 
 
 
92f02a4
288a757
6ecd3d8
bdad7e7
 
 
33e96f0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import gradio as gr
import PyPDF2
import docx
import re
from datetime import datetime
from transformers import pipeline

# Ielādē NER modeli
print("🤖 Ielādē XLM-RoBERTa NER modeli...")
ner_model = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", aggregation_strategy="simple")
print("✅ NER modelis gatavs!")

# Teksta ekstrakcija
def extract_text_from_pdf(file_obj):
    pdf_reader = PyPDF2.PdfReader(file_obj)
    return ''.join([page.extract_text() for page in pdf_reader.pages])

def extract_text_from_docx(file_obj):
    doc = docx.Document(file_obj)
    return '\n'.join([para.text for para in doc.paragraphs])

def extract_text_from_txt(file_obj):
    return file_obj.read().decode('utf-8')

# UZLABOTA vārda ekstrakcija ar NER
def extract_name_with_ner(text):
    try:
        entities = ner_model(text[:500])
        for entity in entities:
            if entity['entity_group'] == 'PER' and entity['score'] > 0.7:
                name = entity['word'].strip()
                if len(name.split()) >= 2:
                    return name
        
        # Fallback regex
        lines = text.split('\n')
        for line in lines[:5]:
            line = line.strip()
            if re.search(r'curriculum vitae|cv|resume|životopiss', line, re.IGNORECASE):
                continue
            if re.match(r'^[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+\s+[A-ZĀČĒĢĪĶĻŅŠŪŽ][a-zāčēģīķļņšūž]+', line):
                return line
        return "Nav atrasts"
    except:
        return "Nav atrasts"

def extract_email(text):
    match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
    return match.group(0) if match else "Nav atrasts"

def extract_phone(text):
    match = re.search(r'\+?\d[\d\s-]{7,}\d', text)
    return match.group(0).strip() if match else "Nav atrasts"

# Analīzes funkcijas
def analyze_experience(text):
    # Meklē dažādus datumu formātus
    # Formāts 1: MM/YYYY to MM/YYYY vai MM/YYYY - MM/YYYY
    pattern1 = r'(\d{1,2}[/.]\d{4})\s*(?:to\s+|-|–|—)\s*(\d{1,2}[/.]\d{4}|tagad|present|šobrīd)'
    # Formāts 2: YYYY-YYYY vai YYYY - YYYY
    pattern2 = r'(\d{4})\s*[-–—]\s*(\d{4}|tagad|present|šobrīd)'
    
    # Meklē ar abiem formātiem
    matches1 = re.findall(pattern1, text, re.IGNORECASE)
    matches2 = re.findall(pattern2, text, re.IGNORECASE)
    
    total_years = 0
    
    # Apstrādā MM/YYYY formātu
    for start, end in matches1:
        try:
            # Izvelk gadus no MM/YYYY formāta
            start_year = int(start.split('/')[-1].split('.')[-1])
            if end.lower() in ['tagad', 'present', 'šobrīd']:
                end_year = datetime.now().year
            else:
                                end_year = int(end.split('/')[-1].split('.')[-1])
        except:
            continue
    
    # Apstrādā YYYY formātu
    for start, end in matches2:
        try:
            start_year = int(start)
            if end.lower() in ['tagad', 'present', 'šobrīd']:
                end_year = datetime.now().year
            else:
                end_year = int(end)
            total_years += (end_year - start_year)
        except:
            continue
    # Punktu skaits pēc pieredzes
    if total_years >= 6:
        exp_score = 30
    elif total_years >= 4:
        exp_score = 20
    elif total_years >= 3:
        exp_score = 15
    elif total_years >= 2:
        exp_score = 10
    elif total_years >= 1:
        exp_score = 5
    else:
        exp_score = 0
    
    return exp_score, f"{total_years} gadi"

def analyze_education(text):
    text_lower = text.lower()
    education_levels = [
        (['phd', 'doktor', 'dr.'], 30, 'Doktorantūra'),
        (['maģistr', 'master'], 25, 'Maģistra grāds'),
        (['bakalaur', 'bachelor'], 20, 'Bakalaura grāds'),
        (['universitāte', 'university', 'college', 'augstskola'], 15, 'Augstākā izglītība'),
        (['vidusskola', 'high school', 'secondary'], 10, 'Vidējā izglītība')
    ]
    
    for keywords, score, level in education_levels:
        if any(word in text_lower for word in keywords):
            return score, level
    return 0, "Nav norādīts"

def analyze_skills(text):
    technical_skills = ['python', 'java', 'javascript', 'c++', 'sql', 'machine learning', 
                        'data analysis', 'excel', 'powerpoint', 'word', 'project management']
    found_skills = [skill for skill in technical_skills if skill.lower() in text.lower()]
    return min(len(found_skills) * 3, 20), ', '.join(found_skills) or 'Nav atrasts'

def analyze_languages(text):
    languages = {
        'latviešu': ['latvie', 'latvian'],
        'angļu': ['angļ', 'english'],
        'krievu': ['kriev', 'russian'],
        'vācu': ['vāc', 'german', 'deutsch'],
        'franču': ['franč', 'french', 'français'],
        'spāņu': ['spāņ', 'spanish', 'español']
    }
    found = [lang for lang, patterns in languages.items() if any(p in text.lower() for p in patterns)]
    return min(len(found) * 5, 20), ', '.join(found) or 'Nav norādīts'

# Ģenerē aprakstu par kandidātu
def generate_candidate_description(name, experience, education, skills, languages):
    """Ģenerē īsu aprakstu par kandidātu balstoties uz CV datiem"""
    # Sāk ar vārdu
    if name == "Nav atrasts":
        description = "Kandidāts"
    else:
        description = name
    
    # Pievieno izglītību
    if education and education != "Nav norādīts":
        description += f" ar {education.lower()}"
    
    # Pievieno pieredzi
    if experience and "gadi" in experience:
        description += f", kam ir {experience} darba pieredze"
    
    # Pievieno prasmes
    if skills and skills != "Nav atrasts":
        skills_list = skills.split(", ")
        if len(skills_list) > 3:
            main_skills = ", ".join(skills_list[:3])
            description += f". Pārzina {main_skills} un citas tehnoloģijas"
        else:
            description += f". Pārzina {skills}"
    
    # Pievieno valodas
    if languages and languages != "Nav norādīts":
        description += f". Runā {languages} valodās"
    
    description += "."
    return description

# Galvenā CV analīzes funkcija
def analyze_cv(file):
    if file is None:
        return "⚠️ Lūdzu, augšupielādējiet CV failu!"
    
    try:
        file_name = file.name
        ext = file_name.split('.')[-1].lower()
        
        if ext == 'pdf':
            text = extract_text_from_pdf(file)
        elif ext == 'docx':
            text = extract_text_from_docx(file)
        elif ext == 'txt':
            text = extract_text_from_txt(file)
        else:
            return "❌ Neatbalstīts faila formāts! Atbalstītie: PDF, DOCX, TXT"
        
        name = extract_name_with_ner(text)
        email = extract_email(text)
        phone = extract_phone(text)
        
        exp_score, experience = analyze_experience(text)
        edu_score, education = analyze_education(text)
        skill_score, skills = analyze_skills(text)
        lang_score, languages = analyze_languages(text)
        
        total = exp_score + edu_score + skill_score + lang_score
        
        # Ģenerē aprakstu par kandidātu
        candidate_description = generate_candidate_description(name, experience, education, skills, languages)
        
        return f"""📊 REZULTĀTI: {total}/100
────────────────────────────
👤 {name} | 📧 {email} | 📱 {phone}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━
📝 {candidate_description}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━────────────────────────────
💼 Pieredze: {exp_score}/30 ({experience})
🎓 Izglītība: {edu_score}/30 ({education})
💻 Prasmes: {skill_score}/20 ({skills})
🌐 Valodas: {lang_score}/20 ({languages})
"""
    
    except Exception as e:
        return f"❌ Kļūda apstrādājot failu: {str(e)}"

# Gradio interfeiss
demo = gr.Interface(
    fn=analyze_cv,
    inputs=gr.File(label="Ielādējiet CV failu", file_types=['.pdf', '.docx', '.txt']),
    outputs=gr.Textbox(label="Analīzes rezultāti", lines=25),
    title="📄 CV Automatīskās Analīzes Sistēma",
    description="""Augšupielādējiet CV failu (PDF, DOCX vai TXT), un sistēma automatīski analizēs:
    
- 👤 Personīgo informāciju
- 💼 Darba pieredzi
- 🎓 Izglītību
- 🌐 Valodu prasmes
- 📚 Tehniskās prasmes

**Rezultāti tiek vērtēti 100 punktu skalā**
"""
)

if __name__ == "__main__":
    demo.launch()