Spaces:
Sleeping
Sleeping
| import json | |
| import re | |
| from pathlib import Path | |
| from difflib import get_close_matches | |
| from collections import OrderedDict | |
| import streamlit as st | |
| import spacy # spaCy for intent detection | |
| from sentence_transformers import SentenceTransformer, util | |
| import torch | |
| # --- Paths --- | |
| ROADMAPS_PATH = Path("roadmaps_fixed.json") | |
| SYNONYMS_PATH = Path("synonyms.json") | |
| # --- Helpers --- | |
| def load_json(path: Path): | |
| if not path.exists(): | |
| return None | |
| with open(path, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| def save_json(obj, path: Path): | |
| with open(path, "w", encoding="utf-8") as f: | |
| json.dump(obj, f, ensure_ascii=False, indent=2) | |
| # --- Load Data --- | |
| ROADMAPS = load_json(ROADMAPS_PATH) | |
| if ROADMAPS is None: | |
| st.error(f"Missing {ROADMAPS_PATH}. Put your roadmaps JSON in the app directory.") | |
| st.stop() | |
| SYNONYMS = load_json(SYNONYMS_PATH) | |
| # --- Auto-generate synonyms if missing --- | |
| def generate_synonyms_from_skills(skills): | |
| synonyms = {} | |
| for skill in skills: | |
| norm = skill.strip() | |
| low = norm.lower() | |
| synonyms[low] = norm | |
| no_nums = re.sub(r"\d+", "", low).strip() | |
| if no_nums and no_nums != low: | |
| synonyms[no_nums] = norm | |
| if " " in low: | |
| acronym = "".join(w[0] for w in low.split() if w and w[0].isalpha()) | |
| if 1 < len(acronym) <= 6: | |
| synonyms[acronym] = norm | |
| if "javascript" in low: | |
| synonyms["js"] = norm | |
| if "typescript" in low: | |
| synonyms["ts"] = norm | |
| if "python" in low: | |
| synonyms["py"] = norm | |
| if "postgresql" in low: | |
| synonyms["postgres"] = norm | |
| synonyms["pgsql"] = norm | |
| if "mysql" in low: | |
| synonyms["maria"] = norm | |
| if "artificial intelligence" in low: | |
| synonyms["ai"] = norm | |
| if "machine learning" in low: | |
| synonyms["ml"] = norm | |
| if "natural language processing" in low: | |
| synonyms["nlp"] = norm | |
| if "computer vision" in low: | |
| synonyms["cv"] = norm | |
| return synonyms | |
| if SYNONYMS is None: | |
| SYNONYMS = generate_synonyms_from_skills(list(ROADMAPS.keys())) | |
| save_json(SYNONYMS, SYNONYMS_PATH) | |
| SKILLS = list(ROADMAPS.keys()) | |
| SKILLS_LOWER = {s.lower(): s for s in SKILLS} | |
| SYN_LOWER = {k.lower(): v for k, v in SYNONYMS.items()} | |
| STOPWORDS = {"in", "on", "at", "it", "an", "to", "by", "of", "for", "and", "or", "the", "a"} | |
| # --- spaCy Intent Detection --- | |
| nlp = spacy.load("en_core_web_sm") | |
| def detect_intent_spacy(user_text: str) -> str: | |
| if not user_text or not user_text.strip(): | |
| return "default" | |
| doc = nlp(user_text.lower()) | |
| single_keywords = {"single", "one", "combined", "merge"} | |
| separate_keywords = {"separate", "different", "individual", "each"} | |
| for token in doc: | |
| if token.text in single_keywords: | |
| return "single" | |
| if token.text in separate_keywords: | |
| return "separate" | |
| for chunk in doc.noun_chunks: | |
| if "single roadmap" in chunk.text or "one roadmap" in chunk.text: | |
| return "single" | |
| if "separate" in chunk.text or "different" in chunk.text: | |
| return "separate" | |
| for token in doc: | |
| if token.lemma_ in {"merge", "combine", "integrate"}: | |
| return "single" | |
| if token.lemma_ in {"split", "divide"}: | |
| return "separate" | |
| return "default" | |
| # --- Career Path Logic --- | |
| def suggest_career_names(skills): | |
| careers = [] | |
| for skill in skills: | |
| skill = skill.lower() | |
| if skill in ROADMAPS and "careers" in ROADMAPS[skill]: | |
| careers.extend(ROADMAPS[skill]["careers"]) | |
| return list(dict.fromkeys(careers)) # remove duplicates | |
| def suggest_combined_career(skills, domains): | |
| domains_lower = [d.lower() for d in domains] | |
| # --- UI/UX + Development Hybrids --- | |
| if "ui/ux design" in domains_lower and "frontend" in domains_lower: | |
| return "Frontend Developer with Design Expertise" | |
| if "ui/ux design" in domains_lower and "backend & web frameworks" in domains_lower: | |
| return "Full-Stack Developer with UX Focus" | |
| if "ui/ux design" in domains_lower and "programming languages" in domains_lower: | |
| return "Design Technologist" | |
| # --- UI/UX + AI Hybrids --- | |
| if "ui/ux design" in domains_lower and "programming languages" in domains_lower and any("ai" in d for d in domains_lower): | |
| return "AI-Driven Designer" | |
| # --- Development + AI Hybrids --- | |
| if "backend & web frameworks" in domains_lower and any("ai" in d for d in domains_lower): | |
| return "AI-Enhanced Full-Stack Developer" | |
| if "frontend" in domains_lower and any("ai" in d for d in domains_lower): | |
| return "AI-Powered Frontend Engineer" | |
| # --- Mobile + AI Hybrids --- | |
| if "mobile development" in domains_lower and any("ai" in d for d in domains_lower): | |
| return "AI-Powered Mobile Developer" | |
| # --- Cloud/DevOps Hybrids --- | |
| if "cloud computing" in domains_lower and "devops" in domains_lower: | |
| return "Cloud DevOps Engineer" | |
| # --- Web3 --- | |
| if "blockchain development" in domains_lower and "web development" in domains_lower: | |
| return "Web3 Developer" | |
| # --- AI Research --- | |
| if "ai" in domains_lower and "data science" in domains_lower: | |
| return "AI Research Scientist" | |
| # --- AI Infrastructure --- | |
| if "ai" in domains_lower and "cloud computing" in domains_lower: | |
| return "AI Infrastructure Engineer" | |
| return None | |
| # --- Hybrid Roadmap Builder (shortened for brevity) --- | |
| def build_hybrid_roadmap(skills, career_name): | |
| roadmap = OrderedDict() | |
| roadmap["Beginner"] = ["Learn basics of each selected skill", "Project: simple hybrid prototype"] | |
| roadmap["Intermediate"] = ["Combine skills into projects", "Project: hybrid application"] | |
| roadmap["Advanced"] = ["Master frameworks across domains", "Project: large-scale hybrid system"] | |
| roadmap["Expert"] = ["Lead innovation in hybrid domain", "Mentor others in hybrid specialization"] | |
| return roadmap | |
| # --- Embeddings Model --- | |
| embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| skills = list(ROADMAPS.keys()) | |
| skill_embeddings = embed_model.encode(skills, convert_to_tensor=True) | |
| # --- Extraction Logic (merged) --- | |
| def extract_skills(user_text: str, threshold: float = 0.6): | |
| found = [] | |
| if not user_text or not user_text.strip(): | |
| return [] | |
| text_lower = user_text.lower() | |
| # --- Keyword & Synonym Matching --- | |
| for skill in SKILLS: | |
| if " " in skill.lower() and skill.lower() in text_lower: | |
| if skill.lower() not in found: | |
| found.append(skill.lower()) | |
| tokens = re.split(r'[,\n; ]+', text_lower) | |
| tokens = [tk.strip() for tk in tokens if tk.strip()] | |
| for tk in tokens: | |
| if not tk or tk in STOPWORDS: | |
| continue | |
| if tk in SYN_LOWER: | |
| mapped = SYN_LOWER[tk].lower() | |
| if mapped in ROADMAPS and mapped not in found: | |
| found.append(mapped) | |
| continue | |
| if tk in SKILLS_LOWER: | |
| mapped = SKILLS_LOWER[tk].lower() | |
| if mapped not in found: | |
| found.append(mapped) | |
| continue | |
| if len(tk) >= 3: | |
| match = get_close_matches(tk, SKILLS_LOWER.keys(), n=1, cutoff=0.75) | |
| if match: | |
| mapped = SKILLS_LOWER[match[0]].lower() | |
| if mapped not in found: | |
| found.append(mapped) | |
| # --- Embeddings Matching --- | |
| user_embedding = embed_model.encode(user_text, convert_to_tensor=True) | |
| cosine_scores = util.cos_sim(user_embedding, skill_embeddings)[0] | |
| for skill, score in zip(skills, cosine_scores): | |
| if float(score) >= threshold and skill.lower() not in found: | |
| found.append(skill.lower()) | |
| return found | |
| # --- Merge multiple roadmaps --- | |
| def merge_roadmaps(skills): | |
| levels = ["beginner", "intermediate", "advanced", "expert"] | |
| merged = OrderedDict() | |
| for lvl in levels: | |
| merged[lvl.capitalize()] = [] | |
| seen = set() | |
| for skill in skills: | |
| if skill not in ROADMAPS: | |
| continue | |
| steps = ROADMAPS[skill].get(lvl, []) | |
| for s in steps: | |
| s_norm = s.strip() | |
| if s_norm and s_norm not in seen: | |
| merged[lvl.capitalize()].append(f"{s_norm} β ({skill})") | |
| seen.add(s_norm) | |
| return merged | |
| # --- Streamlit UI --- | |
| st.set_page_config(page_title="Skill β Roadmap", layout="wide") | |
| st.title("Skill β Roadmap") | |
| user_input = st.text_area( | |
| "Enter your skills (paragraph, comma-separated, or a sentence)", | |
| height=140, | |
| placeholder="e.g. I'm experienced in python, javascript, and figma" | |
| ) | |
| if st.button("Generate roadmap"): | |
| skills = extract_skills(user_input) | |
| domains = [ROADMAPS[s].get("domain", "").lower() for s in skills if s in ROADMAPS] | |
| st.markdown(f"π **Detected skills:** {', '.join(skills) if skills else 'None'}") | |
| intent = detect_intent_spacy(user_input) | |
| want_single = True if intent == "single" else False if intent == "separate" else True | |
| if want_single: | |
| combined_career = suggest_combined_career(skills, domains) | |
| if combined_career: | |
| st.subheader(f"π Hybrid Career Path: {combined_career}") | |
| roadmap = build_hybrid_roadmap(skills, combined_career) | |
| for lvl, steps in roadmap.items(): | |
| st.markdown(f"**{lvl}**") | |
| for step in steps: | |
| st.write(f"- {step}") | |
| else: | |
| merged = merge_roadmaps(skills) | |
| career_names = suggest_career_names(skills) | |
| st.subheader("π Possible Career Paths:") | |
| if career_names: | |
| for c in career_names: | |
| st.markdown(f"- {c}") | |
| else: | |
| st.write("No specific career paths found for these skills.") | |
| st.subheader("π Roadmap") | |
| for lvl, steps in merged.items(): | |
| st.markdown(f"**{lvl}**") | |
| for step in steps: | |
| st.write(f"- {step}") | |
| else: | |
| st.subheader("π Individual Skill Roadmaps") | |
| if not skills: | |
| st.write("No skills detected.") | |
| for skill in skills: | |
| if skill in ROADMAPS: | |
| st.markdown(f"### {skill} β {ROADMAPS[skill].get('domain','')}") | |
| for lvl, steps in ROADMAPS[skill].items(): | |
| if lvl in {"domain", "careers"}: | |
| continue | |
| st.markdown(f"**{lvl.capitalize()}**") | |
| for s in steps: | |
| st.write(f"- {s}") | |
| else: | |
| st.warning(f"No roadmap found for {skill}") | |