Spaces:

Danial7
/

Smart_CV_Analyzer

Sleeping

App Files Files Community

Danial7 commited on May 17, 2025

Commit

2bd0c88

verified ·

1 Parent(s): 41b2d80

Update utils.py

Browse files

Files changed (1) hide show

utils.py +82 -103

utils.py CHANGED Viewed

@@ -1,116 +1,95 @@
 import requests
-import pandas as pd
 import streamlit as st
-from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
-from keybert import KeyBERT
-# Keyword extraction
-def extract_keywords(text, num_keywords=10):
-    kw_model = KeyBERT()
-    keywords = kw_model.extract_keywords(text, top_n=num_keywords, stop_words='english')
-    return [kw[0].lower() for kw in keywords]
-# Field identification
-def identify_field(keywords):
-    fields = {
-        "Engineering": ["engineer", "mechanical", "electrical", "civil", "plc", "automation"],
-        "Data Science": ["machine learning", "data", "python", "statistics", "ai"],
-        "Software Development": ["developer", "software", "backend", "frontend", "javascript"],
-        "Marketing": ["seo", "content", "marketing", "branding"],
-        "Finance": ["accounting", "finance", "budget", "tax"],
-        "Design": ["photoshop", "illustrator", "design", "creative"],
-        "Healthcare": ["nursing", "surgery", "hospital", "patient"],
-        "Construction": ["carpentry", "plumbing", "hvac", "gardening", "mining"]
-    }
-    scores = {field: len(set(keywords).intersection(terms)) for field, terms in fields.items()}
-    return max(scores, key=scores.get)
-# Technical background
-def is_technical_background(keywords):
-    tech_terms = ["engineer", "machine learning", "python", "developer", "software", "automation", "plc", "ai"]
-    non_tech_terms = ["marketing", "finance", "content", "seo", "branding", "accounting", "creative"]
-    tech_score = len(set(keywords).intersection(tech_terms))
-    non_tech_score = len(set(keywords).intersection(non_tech_terms))
-    return "Technical" if tech_score >= non_tech_score else "Non-Technical"
-# CV skill score
-def calculate_cv_score(text, keywords):
-    ideal = " ".join(keywords)
-    tfidf = TfidfVectorizer()
-    tfidf_matrix = tfidf.fit_transform([text, ideal])
-    score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
-    return round(score * 100)
-# Dynamic skill suggestions using Open Skills API
-def suggest_upskilling(keywords):
-    try:
-        response = requests.get("https://publicapis.dev/api/skills")
-        response.raise_for_status()
-        all_skills = set(response.json().get("skills", []))
-        current_skills = set([kw.lower() for kw in keywords])
-        missing_skills = all_skills - current_skills
-        return list(missing_skills)[:10]
-    except Exception as e:
-        st.error(f"Error fetching skills: {e}")
-        return []
-# Certification suggestions using Free Certifications API
-def suggest_certifications(keywords):
-    try:
-        response = requests.get("https://free-certifications.com/api/certifications")
-        response.raise_for_status()
-        certifications = response.json().get("certifications", [])
-        relevant_certs = [cert for cert in certifications if any(kw in cert.lower() for kw in keywords)]
-        return relevant_certs[:10]
-    except Exception as e:
-        st.error(f"Error fetching certifications: {e}")
-        return []
-# Scholarship suggestions using ScholarshipOwl API
-def suggest_scholarships(keywords):
-    try:
-        response = requests.get("https://docs.business.scholarshipowl.com/api/scholarships")
-        response.raise_for_status()
-        scholarships = response.json().get("scholarships", [])
-        relevant_scholarships = [sch for sch in scholarships if any(kw in sch.lower() for kw in keywords)]
-        return relevant_scholarships[:10]
-    except Exception as e:
-        st.error(f"Error fetching scholarships: {e}")
-        return []
-# Education opportunities using DecidED API
-def suggest_education_opportunities(keywords):
-    try:
-        response = requests.get("https://decided.org/api/education")
-        response.raise_for_status()
-        programs = response.json().get("programs", [])
-        relevant_programs = [prog for prog in programs if any(kw in prog.lower() for kw in keywords)]
-        return relevant_programs[:10]
-    except Exception as e:
-        st.error(f"Error fetching education opportunities: {e}")
-        return []
-# Visa opportunities using Visa Checker API
-def suggest_visa_opportunities(keywords):
     try:
-        response = requests.get("https://zylalabs.com/api-marketplace/travel/visa%2Bchecker%2Bapi/2154")
-        response.raise_for_status()
-        visa_info = response.json().get("visa_requirements", [])
-        relevant_visas = [visa for visa in visa_info if any(kw in visa.lower() for kw in keywords)]
-        return relevant_visas[:10]
     except Exception as e:
-        st.error(f"Error fetching visa opportunities: {e}")
-        return []
-# Job listings using TheirStack Job Postings API
-def get_job_listings(keywords, location="Pakistan", results_per_page=10):
     try:
-        query = "+".join(keywords)
-        response = requests.get(f"https://theirstack.com/api/jobs?query={query}&location={location}&limit={results_per_page}")
         response.raise_for_status()
-        jobs = response.json().get("jobs", [])
-        return pd.DataFrame(jobs)
     except Exception as e:
-        st.error(f"Error fetching job listings: {e}")
-        return pd.DataFrame()

+import re
+import json
 import requests
+import spacy
 import streamlit as st
+from transformers import pipeline
+from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+# Load spaCy model
+nlp_spacy = spacy.load("en_core_web_sm")
+# Load LLM model (e.g., Mistral, OpenHermes, Phi-2) using Transformers pipeline
+llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1", device=-1)
+# Adzuna credentials (replace with your actual app_id and app_key)
+ADZUNA_APP_ID = "your_adzuna_app_id"
+ADZUNA_APP_KEY = "your_adzuna_app_key"
+def extract_keywords(cv_text, top_n=10):
+    doc = nlp_spacy(cv_text.lower())
+    keywords = [chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) < 4]
+    return list(set(keywords))[:top_n]
+def generate_with_llm(prompt, max_tokens=200):
     try:
+        result = llm(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=0.7)
+        return result[0]['generated_text'].split("\n")[1:]  # skip prompt
     except Exception as e:
+        return [f"Error generating with LLM: {e}"]
+def get_skills_suggestions(cv_text):
+    prompt = f"Based on the following CV:\n{cv_text}\nSuggest the top 5 relevant technical or professional skills the user should learn to improve job opportunities."
+    return generate_with_llm(prompt)
+def get_certification_recommendations(cv_text):
+    prompt = f"Given the CV below, suggest 3 globally recognized certifications to improve their chances of getting a better job or promotion:\n\n{cv_text}"
+    return generate_with_llm(prompt)
+def get_scholarship_suggestions(cv_text):
+    prompt = f"The following CV belongs to a person looking for international scholarships. Suggest 3 relevant scholarship opportunities or types of scholarships (e.g., STEM, MBA, Engineering) they could pursue:\n\n{cv_text}"
+    return generate_with_llm(prompt)
+def get_education_opportunities(cv_text):
+    prompt = f"Based on the CV content below, suggest 3 higher education programs or degree types (e.g., MBA, MSc in AI, MEng Industrial Design) that would complement the user's experience:\n\n{cv_text}"
+    return generate_with_llm(prompt)
+def get_visa_pathways(cv_text, country="USA"):
+    prompt = f"Based on this CV and assuming the user wants to move to {country}, suggest the top 3 most relevant visa types or pathways for this person's profession and experience:\n\n{cv_text}"
+    return generate_with_llm(prompt)
+def get_job_recommendations(cv_text, location="USA", results=10):
+    keywords = extract_keywords(cv_text)
+    query = "+".join(keywords)
+    url = f"https://api.adzuna.com/v1/api/jobs/us/search/1"
+    params = {
+        "app_id": ADZUNA_APP_ID,
+        "app_key": ADZUNA_APP_KEY,
+        "what": query,
+        "where": location,
+        "results_per_page": results,
+        "content-type": "application/json"
+    }
     try:
+        response = requests.get(url, params=params)
         response.raise_for_status()
+        job_data = response.json()
+        jobs = [{
+            "title": job.get("title"),
+            "company": job.get("company", {}).get("display_name"),
+            "location": job.get("location", {}).get("display_name"),
+            "description": job.get("description"),
+            "url": job.get("redirect_url")
+        } for job in job_data.get("results", [])]
+        return jobs
     except Exception as e:
+        return [{"title": "Error fetching job listings", "description": str(e)}]
+def get_cv_score(cv_text):
+    # Simple scoring based on keyword richness
+    keywords = extract_keywords(cv_text)
+    score = min(len(keywords) * 10, 100)
+    return score
+def get_field_classification(cv_text):
+    prompt = f"Based on this CV, determine the most relevant professional domain or field (e.g., Data Science, Civil Engineering, Marketing, Education):\n\n{cv_text}"
+    return generate_with_llm(prompt, max_tokens=50)[0]
+def get_personalized_advice(cv_text):
+    prompt = f"This is a user's CV. Give a friendly, professional career counselor-style paragraph with guidance and encouragement for their next steps:\n\n{cv_text}"
+    result = generate_with_llm(prompt, max_tokens=150)
+    return "\n".join(result)