adan012's picture
Create app.py
590c4d5 verified
import json
import re
from pathlib import Path
from difflib import get_close_matches
from collections import OrderedDict
import streamlit as st
import spacy # spaCy for intent detection
from sentence_transformers import SentenceTransformer, util
import torch
# --- Paths ---
ROADMAPS_PATH = Path("roadmaps_fixed.json")
SYNONYMS_PATH = Path("synonyms.json")
# --- Helpers ---
def load_json(path: Path):
if not path.exists():
return None
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def save_json(obj, path: Path):
with open(path, "w", encoding="utf-8") as f:
json.dump(obj, f, ensure_ascii=False, indent=2)
# --- Load Data ---
ROADMAPS = load_json(ROADMAPS_PATH)
if ROADMAPS is None:
st.error(f"Missing {ROADMAPS_PATH}. Put your roadmaps JSON in the app directory.")
st.stop()
SYNONYMS = load_json(SYNONYMS_PATH)
# --- Auto-generate synonyms if missing ---
def generate_synonyms_from_skills(skills):
synonyms = {}
for skill in skills:
norm = skill.strip()
low = norm.lower()
synonyms[low] = norm
no_nums = re.sub(r"\d+", "", low).strip()
if no_nums and no_nums != low:
synonyms[no_nums] = norm
if " " in low:
acronym = "".join(w[0] for w in low.split() if w and w[0].isalpha())
if 1 < len(acronym) <= 6:
synonyms[acronym] = norm
if "javascript" in low:
synonyms["js"] = norm
if "typescript" in low:
synonyms["ts"] = norm
if "python" in low:
synonyms["py"] = norm
if "postgresql" in low:
synonyms["postgres"] = norm
synonyms["pgsql"] = norm
if "mysql" in low:
synonyms["maria"] = norm
if "artificial intelligence" in low:
synonyms["ai"] = norm
if "machine learning" in low:
synonyms["ml"] = norm
if "natural language processing" in low:
synonyms["nlp"] = norm
if "computer vision" in low:
synonyms["cv"] = norm
return synonyms
if SYNONYMS is None:
SYNONYMS = generate_synonyms_from_skills(list(ROADMAPS.keys()))
save_json(SYNONYMS, SYNONYMS_PATH)
SKILLS = list(ROADMAPS.keys())
SKILLS_LOWER = {s.lower(): s for s in SKILLS}
SYN_LOWER = {k.lower(): v for k, v in SYNONYMS.items()}
STOPWORDS = {"in", "on", "at", "it", "an", "to", "by", "of", "for", "and", "or", "the", "a"}
# --- spaCy Intent Detection ---
nlp = spacy.load("en_core_web_sm")
def detect_intent_spacy(user_text: str) -> str:
if not user_text or not user_text.strip():
return "default"
doc = nlp(user_text.lower())
single_keywords = {"single", "one", "combined", "merge"}
separate_keywords = {"separate", "different", "individual", "each"}
for token in doc:
if token.text in single_keywords:
return "single"
if token.text in separate_keywords:
return "separate"
for chunk in doc.noun_chunks:
if "single roadmap" in chunk.text or "one roadmap" in chunk.text:
return "single"
if "separate" in chunk.text or "different" in chunk.text:
return "separate"
for token in doc:
if token.lemma_ in {"merge", "combine", "integrate"}:
return "single"
if token.lemma_ in {"split", "divide"}:
return "separate"
return "default"
# --- Career Path Logic ---
def suggest_career_names(skills):
careers = []
for skill in skills:
skill = skill.lower()
if skill in ROADMAPS and "careers" in ROADMAPS[skill]:
careers.extend(ROADMAPS[skill]["careers"])
return list(dict.fromkeys(careers)) # remove duplicates
def suggest_combined_career(skills, domains):
domains_lower = [d.lower() for d in domains]
# --- UI/UX + Development Hybrids ---
if "ui/ux design" in domains_lower and "frontend" in domains_lower:
return "Frontend Developer with Design Expertise"
if "ui/ux design" in domains_lower and "backend & web frameworks" in domains_lower:
return "Full-Stack Developer with UX Focus"
if "ui/ux design" in domains_lower and "programming languages" in domains_lower:
return "Design Technologist"
# --- UI/UX + AI Hybrids ---
if "ui/ux design" in domains_lower and "programming languages" in domains_lower and any("ai" in d for d in domains_lower):
return "AI-Driven Designer"
# --- Development + AI Hybrids ---
if "backend & web frameworks" in domains_lower and any("ai" in d for d in domains_lower):
return "AI-Enhanced Full-Stack Developer"
if "frontend" in domains_lower and any("ai" in d for d in domains_lower):
return "AI-Powered Frontend Engineer"
# --- Mobile + AI Hybrids ---
if "mobile development" in domains_lower and any("ai" in d for d in domains_lower):
return "AI-Powered Mobile Developer"
# --- Cloud/DevOps Hybrids ---
if "cloud computing" in domains_lower and "devops" in domains_lower:
return "Cloud DevOps Engineer"
# --- Web3 ---
if "blockchain development" in domains_lower and "web development" in domains_lower:
return "Web3 Developer"
# --- AI Research ---
if "ai" in domains_lower and "data science" in domains_lower:
return "AI Research Scientist"
# --- AI Infrastructure ---
if "ai" in domains_lower and "cloud computing" in domains_lower:
return "AI Infrastructure Engineer"
return None
# --- Hybrid Roadmap Builder (shortened for brevity) ---
def build_hybrid_roadmap(skills, career_name):
roadmap = OrderedDict()
roadmap["Beginner"] = ["Learn basics of each selected skill", "Project: simple hybrid prototype"]
roadmap["Intermediate"] = ["Combine skills into projects", "Project: hybrid application"]
roadmap["Advanced"] = ["Master frameworks across domains", "Project: large-scale hybrid system"]
roadmap["Expert"] = ["Lead innovation in hybrid domain", "Mentor others in hybrid specialization"]
return roadmap
# --- Embeddings Model ---
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
skills = list(ROADMAPS.keys())
skill_embeddings = embed_model.encode(skills, convert_to_tensor=True)
# --- Extraction Logic (merged) ---
def extract_skills(user_text: str, threshold: float = 0.6):
found = []
if not user_text or not user_text.strip():
return []
text_lower = user_text.lower()
# --- Keyword & Synonym Matching ---
for skill in SKILLS:
if " " in skill.lower() and skill.lower() in text_lower:
if skill.lower() not in found:
found.append(skill.lower())
tokens = re.split(r'[,\n; ]+', text_lower)
tokens = [tk.strip() for tk in tokens if tk.strip()]
for tk in tokens:
if not tk or tk in STOPWORDS:
continue
if tk in SYN_LOWER:
mapped = SYN_LOWER[tk].lower()
if mapped in ROADMAPS and mapped not in found:
found.append(mapped)
continue
if tk in SKILLS_LOWER:
mapped = SKILLS_LOWER[tk].lower()
if mapped not in found:
found.append(mapped)
continue
if len(tk) >= 3:
match = get_close_matches(tk, SKILLS_LOWER.keys(), n=1, cutoff=0.75)
if match:
mapped = SKILLS_LOWER[match[0]].lower()
if mapped not in found:
found.append(mapped)
# --- Embeddings Matching ---
user_embedding = embed_model.encode(user_text, convert_to_tensor=True)
cosine_scores = util.cos_sim(user_embedding, skill_embeddings)[0]
for skill, score in zip(skills, cosine_scores):
if float(score) >= threshold and skill.lower() not in found:
found.append(skill.lower())
return found
# --- Merge multiple roadmaps ---
def merge_roadmaps(skills):
levels = ["beginner", "intermediate", "advanced", "expert"]
merged = OrderedDict()
for lvl in levels:
merged[lvl.capitalize()] = []
seen = set()
for skill in skills:
if skill not in ROADMAPS:
continue
steps = ROADMAPS[skill].get(lvl, [])
for s in steps:
s_norm = s.strip()
if s_norm and s_norm not in seen:
merged[lvl.capitalize()].append(f"{s_norm} β€” ({skill})")
seen.add(s_norm)
return merged
# --- Streamlit UI ---
st.set_page_config(page_title="Skill β†’ Roadmap", layout="wide")
st.title("Skill β†’ Roadmap")
user_input = st.text_area(
"Enter your skills (paragraph, comma-separated, or a sentence)",
height=140,
placeholder="e.g. I'm experienced in python, javascript, and figma"
)
if st.button("Generate roadmap"):
skills = extract_skills(user_input)
domains = [ROADMAPS[s].get("domain", "").lower() for s in skills if s in ROADMAPS]
st.markdown(f"πŸ”Ž **Detected skills:** {', '.join(skills) if skills else 'None'}")
intent = detect_intent_spacy(user_input)
want_single = True if intent == "single" else False if intent == "separate" else True
if want_single:
combined_career = suggest_combined_career(skills, domains)
if combined_career:
st.subheader(f"🌍 Hybrid Career Path: {combined_career}")
roadmap = build_hybrid_roadmap(skills, combined_career)
for lvl, steps in roadmap.items():
st.markdown(f"**{lvl}**")
for step in steps:
st.write(f"- {step}")
else:
merged = merge_roadmaps(skills)
career_names = suggest_career_names(skills)
st.subheader("🌍 Possible Career Paths:")
if career_names:
for c in career_names:
st.markdown(f"- {c}")
else:
st.write("No specific career paths found for these skills.")
st.subheader("πŸ“˜ Roadmap")
for lvl, steps in merged.items():
st.markdown(f"**{lvl}**")
for step in steps:
st.write(f"- {step}")
else:
st.subheader("πŸ“˜ Individual Skill Roadmaps")
if not skills:
st.write("No skills detected.")
for skill in skills:
if skill in ROADMAPS:
st.markdown(f"### {skill} β€” {ROADMAPS[skill].get('domain','')}")
for lvl, steps in ROADMAPS[skill].items():
if lvl in {"domain", "careers"}:
continue
st.markdown(f"**{lvl.capitalize()}**")
for s in steps:
st.write(f"- {s}")
else:
st.warning(f"No roadmap found for {skill}")