Spaces:
Sleeping
Sleeping
File size: 10,857 Bytes
bc45d96 54b1a86 bc45d96 54b1a86 bc45d96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
import json
import re
from pathlib import Path
from difflib import get_close_matches
from collections import OrderedDict
import streamlit as st
import spacy # spaCy for intent detection
from sentence_transformers import SentenceTransformer, util
import torch
# --- Paths ---
ROADMAPS_PATH = Path("roadmaps_fixed.json")
SYNONYMS_PATH = Path("synonyms.json")
# --- Helpers ---
def load_json(path: Path):
if not path.exists():
return None
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def save_json(obj, path: Path):
with open(path, "w", encoding="utf-8") as f:
json.dump(obj, f, ensure_ascii=False, indent=2)
# --- Load Data ---
ROADMAPS = load_json(ROADMAPS_PATH)
if ROADMAPS is None:
st.error(f"Missing {ROADMAPS_PATH}. Put your roadmaps JSON in the app directory.")
st.stop()
SYNONYMS = load_json(SYNONYMS_PATH)
# --- Auto-generate synonyms if missing ---
def generate_synonyms_from_skills(skills):
synonyms = {}
for skill in skills:
norm = skill.strip()
low = norm.lower()
synonyms[low] = norm
no_nums = re.sub(r"\d+", "", low).strip()
if no_nums and no_nums != low:
synonyms[no_nums] = norm
if " " in low:
acronym = "".join(w[0] for w in low.split() if w and w[0].isalpha())
if 1 < len(acronym) <= 6:
synonyms[acronym] = norm
if "javascript" in low:
synonyms["js"] = norm
if "typescript" in low:
synonyms["ts"] = norm
if "python" in low:
synonyms["py"] = norm
if "postgresql" in low:
synonyms["postgres"] = norm
synonyms["pgsql"] = norm
if "mysql" in low:
synonyms["maria"] = norm
if "artificial intelligence" in low:
synonyms["ai"] = norm
if "machine learning" in low:
synonyms["ml"] = norm
if "natural language processing" in low:
synonyms["nlp"] = norm
if "computer vision" in low:
synonyms["cv"] = norm
return synonyms
if SYNONYMS is None:
SYNONYMS = generate_synonyms_from_skills(list(ROADMAPS.keys()))
save_json(SYNONYMS, SYNONYMS_PATH)
SKILLS = list(ROADMAPS.keys())
SKILLS_LOWER = {s.lower(): s for s in SKILLS}
SYN_LOWER = {k.lower(): v for k, v in SYNONYMS.items()}
STOPWORDS = {"in", "on", "at", "it", "an", "to", "by", "of", "for", "and", "or", "the", "a"}
# --- spaCy Intent Detection ---
nlp = spacy.load("en_core_web_sm")
def detect_intent_spacy(user_text: str) -> str:
if not user_text or not user_text.strip():
return "default"
doc = nlp(user_text.lower())
single_keywords = {"single", "one", "combined", "merge"}
separate_keywords = {"separate", "different", "individual", "each"}
for token in doc:
if token.text in single_keywords:
return "single"
if token.text in separate_keywords:
return "separate"
for chunk in doc.noun_chunks:
if "single roadmap" in chunk.text or "one roadmap" in chunk.text:
return "single"
if "separate" in chunk.text or "different" in chunk.text:
return "separate"
for token in doc:
if token.lemma_ in {"merge", "combine", "integrate"}:
return "single"
if token.lemma_ in {"split", "divide"}:
return "separate"
return "default"
# --- Career Path Logic ---
def suggest_career_names(skills):
careers = []
for skill in skills:
skill = skill.lower()
if skill in ROADMAPS and "careers" in ROADMAPS[skill]:
careers.extend(ROADMAPS[skill]["careers"])
return list(dict.fromkeys(careers)) # remove duplicates
def suggest_combined_career(skills, domains):
domains_lower = [d.lower() for d in domains]
# --- UI/UX + Development Hybrids ---
if "ui/ux design" in domains_lower and "frontend" in domains_lower:
return "Frontend Developer with Design Expertise"
if "ui/ux design" in domains_lower and "backend & web frameworks" in domains_lower:
return "Full-Stack Developer with UX Focus"
if "ui/ux design" in domains_lower and "programming languages" in domains_lower:
return "Design Technologist"
# --- UI/UX + AI Hybrids ---
if "ui/ux design" in domains_lower and "programming languages" in domains_lower and any("ai" in d for d in domains_lower):
return "AI-Driven Designer"
# --- Development + AI Hybrids ---
if "backend & web frameworks" in domains_lower and any("ai" in d for d in domains_lower):
return "AI-Enhanced Full-Stack Developer"
if "frontend" in domains_lower and any("ai" in d for d in domains_lower):
return "AI-Powered Frontend Engineer"
# --- Mobile + AI Hybrids ---
if "mobile development" in domains_lower and any("ai" in d for d in domains_lower):
return "AI-Powered Mobile Developer"
# --- Cloud/DevOps Hybrids ---
if "cloud computing" in domains_lower and "devops" in domains_lower:
return "Cloud DevOps Engineer"
# --- Web3 ---
if "blockchain development" in domains_lower and "web development" in domains_lower:
return "Web3 Developer"
# --- AI Research ---
if "ai" in domains_lower and "data science" in domains_lower:
return "AI Research Scientist"
# --- AI Infrastructure ---
if "ai" in domains_lower and "cloud computing" in domains_lower:
return "AI Infrastructure Engineer"
return None
# --- Hybrid Roadmap Builder (shortened for brevity) ---
def build_hybrid_roadmap(skills, career_name):
roadmap = OrderedDict()
roadmap["Beginner"] = ["Learn basics of each selected skill", "Project: simple hybrid prototype"]
roadmap["Intermediate"] = ["Combine skills into projects", "Project: hybrid application"]
roadmap["Advanced"] = ["Master frameworks across domains", "Project: large-scale hybrid system"]
roadmap["Expert"] = ["Lead innovation in hybrid domain", "Mentor others in hybrid specialization"]
return roadmap
# --- Embeddings Model ---
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
skills = list(ROADMAPS.keys())
skill_embeddings = embed_model.encode(skills, convert_to_tensor=True)
# --- Extraction Logic (merged) ---
def extract_skills(user_text: str, threshold: float = 0.6):
found = []
if not user_text or not user_text.strip():
return []
text_lower = user_text.lower()
# --- Keyword & Synonym Matching ---
for skill in SKILLS:
if " " in skill.lower() and skill.lower() in text_lower:
if skill.lower() not in found:
found.append(skill.lower())
tokens = re.split(r'[,\n; ]+', text_lower)
tokens = [tk.strip() for tk in tokens if tk.strip()]
for tk in tokens:
if not tk or tk in STOPWORDS:
continue
if tk in SYN_LOWER:
mapped = SYN_LOWER[tk].lower()
if mapped in ROADMAPS and mapped not in found:
found.append(mapped)
continue
if tk in SKILLS_LOWER:
mapped = SKILLS_LOWER[tk].lower()
if mapped not in found:
found.append(mapped)
continue
if len(tk) >= 3:
match = get_close_matches(tk, SKILLS_LOWER.keys(), n=1, cutoff=0.75)
if match:
mapped = SKILLS_LOWER[match[0]].lower()
if mapped not in found:
found.append(mapped)
# --- Embeddings Matching ---
user_embedding = embed_model.encode(user_text, convert_to_tensor=True)
cosine_scores = util.cos_sim(user_embedding, skill_embeddings)[0]
for skill, score in zip(skills, cosine_scores):
if float(score) >= threshold and skill.lower() not in found:
found.append(skill.lower())
return found
# --- Merge multiple roadmaps ---
def merge_roadmaps(skills):
levels = ["beginner", "intermediate", "advanced", "expert"]
merged = OrderedDict()
for lvl in levels:
merged[lvl.capitalize()] = []
seen = set()
for skill in skills:
if skill not in ROADMAPS:
continue
steps = ROADMAPS[skill].get(lvl, [])
for s in steps:
s_norm = s.strip()
if s_norm and s_norm not in seen:
merged[lvl.capitalize()].append(f"{s_norm} β ({skill})")
seen.add(s_norm)
return merged
# --- Streamlit UI ---
st.set_page_config(page_title="Skill β Roadmap", layout="wide")
st.title("Skill β Roadmap")
user_input = st.text_area(
"Enter your skills (paragraph, comma-separated, or a sentence)",
height=140,
placeholder="e.g. I'm experienced in python, javascript, and figma"
)
if st.button("Generate roadmap"):
skills = extract_skills(user_input)
domains = [ROADMAPS[s].get("domain", "").lower() for s in skills if s in ROADMAPS]
st.markdown(f"π **Detected skills:** {', '.join(skills) if skills else 'None'}")
intent = detect_intent_spacy(user_input)
want_single = True if intent == "single" else False if intent == "separate" else True
if want_single:
combined_career = suggest_combined_career(skills, domains)
if combined_career:
st.subheader(f"π Hybrid Career Path: {combined_career}")
roadmap = build_hybrid_roadmap(skills, combined_career)
for lvl, steps in roadmap.items():
st.markdown(f"**{lvl}**")
for step in steps:
st.write(f"- {step}")
else:
merged = merge_roadmaps(skills)
career_names = suggest_career_names(skills)
st.subheader("π Possible Career Paths:")
if career_names:
for c in career_names:
st.markdown(f"- {c}")
else:
st.write("No specific career paths found for these skills.")
st.subheader("π Roadmap")
for lvl, steps in merged.items():
st.markdown(f"**{lvl}**")
for step in steps:
st.write(f"- {step}")
else:
st.subheader("π Individual Skill Roadmaps")
if not skills:
st.write("No skills detected.")
for skill in skills:
if skill in ROADMAPS:
st.markdown(f"### {skill} β {ROADMAPS[skill].get('domain','')}")
for lvl, steps in ROADMAPS[skill].items():
if lvl in {"domain", "careers"}:
continue
st.markdown(f"**{lvl.capitalize()}**")
for s in steps:
st.write(f"- {s}")
else:
st.warning(f"No roadmap found for {skill}") |