adan012 commited on
Commit
bc45d96
Β·
verified Β·
1 Parent(s): 7f78599

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +303 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,305 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
 
 
 
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ import json
2
+ import re
3
+ from pathlib import Path
4
+ from difflib import get_close_matches
5
+ from collections import OrderedDict
6
+
7
  import streamlit as st
8
+ import spacy # spaCy for intent detection
9
+ from sentence_transformers import SentenceTransformer, util
10
+ import torch
11
+
12
+ # --- Paths ---
13
+ ROADMAPS_PATH = Path("roadmaps_fixed.json")
14
+ SYNONYMS_PATH = Path("synonyms.json")
15
+
16
+ # --- Helpers ---
17
+ def load_json(path: Path):
18
+ if not path.exists():
19
+ return None
20
+ with open(path, "r", encoding="utf-8") as f:
21
+ return json.load(f)
22
+
23
+ def save_json(obj, path: Path):
24
+ with open(path, "w", encoding="utf-8") as f:
25
+ json.dump(obj, f, ensure_ascii=False, indent=2)
26
+
27
+ # --- Load Data ---
28
+ ROADMAPS = load_json(ROADMAPS_PATH)
29
+ if ROADMAPS is None:
30
+ st.error(f"Missing {ROADMAPS_PATH}. Put your roadmaps JSON in the app directory.")
31
+ st.stop()
32
+
33
+ SYNONYMS = load_json(SYNONYMS_PATH)
34
+
35
+ # --- Auto-generate synonyms if missing ---
36
+ def generate_synonyms_from_skills(skills):
37
+ synonyms = {}
38
+ for skill in skills:
39
+ norm = skill.strip()
40
+ low = norm.lower()
41
+ synonyms[low] = norm
42
+ no_nums = re.sub(r"\d+", "", low).strip()
43
+ if no_nums and no_nums != low:
44
+ synonyms[no_nums] = norm
45
+ if " " in low:
46
+ acronym = "".join(w[0] for w in low.split() if w and w[0].isalpha())
47
+ if 1 < len(acronym) <= 6:
48
+ synonyms[acronym] = norm
49
+ if "javascript" in low:
50
+ synonyms["js"] = norm
51
+ if "typescript" in low:
52
+ synonyms["ts"] = norm
53
+ if "python" in low:
54
+ synonyms["py"] = norm
55
+ if "postgresql" in low:
56
+ synonyms["postgres"] = norm
57
+ synonyms["pgsql"] = norm
58
+ if "mysql" in low:
59
+ synonyms["maria"] = norm
60
+ if "artificial intelligence" in low:
61
+ synonyms["ai"] = norm
62
+ if "machine learning" in low:
63
+ synonyms["ml"] = norm
64
+ if "natural language processing" in low:
65
+ synonyms["nlp"] = norm
66
+ if "computer vision" in low:
67
+ synonyms["cv"] = norm
68
+ return synonyms
69
+
70
+ if SYNONYMS is None:
71
+ SYNONYMS = generate_synonyms_from_skills(list(ROADMAPS.keys()))
72
+ save_json(SYNONYMS, SYNONYMS_PATH)
73
+
74
+ SKILLS = list(ROADMAPS.keys())
75
+ SKILLS_LOWER = {s.lower(): s for s in SKILLS}
76
+ SYN_LOWER = {k.lower(): v for k, v in SYNONYMS.items()}
77
+
78
+ STOPWORDS = {"in", "on", "at", "it", "an", "to", "by", "of", "for", "and", "or", "the", "a"}
79
+
80
+ # --- spaCy Intent Detection ---
81
+ nlp = spacy.load("en_core_web_sm")
82
+
83
+ def detect_intent_spacy(user_text: str) -> str:
84
+ if not user_text or not user_text.strip():
85
+ return "default"
86
+
87
+ doc = nlp(user_text.lower())
88
+ single_keywords = {"single", "one", "combined", "merge"}
89
+ separate_keywords = {"separate", "different", "individual", "each"}
90
+
91
+ for token in doc:
92
+ if token.text in single_keywords:
93
+ return "single"
94
+ if token.text in separate_keywords:
95
+ return "separate"
96
+
97
+ for chunk in doc.noun_chunks:
98
+ if "single roadmap" in chunk.text or "one roadmap" in chunk.text:
99
+ return "single"
100
+ if "separate" in chunk.text or "different" in chunk.text:
101
+ return "separate"
102
+
103
+ for token in doc:
104
+ if token.lemma_ in {"merge", "combine", "integrate"}:
105
+ return "single"
106
+ if token.lemma_ in {"split", "divide"}:
107
+ return "separate"
108
+
109
+ return "default"
110
+
111
+ # --- Career Path Logic ---
112
+ def suggest_career_names(skills):
113
+ careers = []
114
+ for skill in skills:
115
+ skill = skill.lower()
116
+ if skill in ROADMAPS and "careers" in ROADMAPS[skill]:
117
+ careers.extend(ROADMAPS[skill]["careers"])
118
+ return list(dict.fromkeys(careers)) # remove duplicates
119
+
120
+ def suggest_combined_career(skills, domains):
121
+ domains_lower = [d.lower() for d in domains]
122
+
123
+ # --- UI/UX + Development Hybrids ---
124
+ if "ui/ux design" in domains_lower and "frontend" in domains_lower:
125
+ return "Frontend Developer with Design Expertise"
126
+ if "ui/ux design" in domains_lower and "backend & web frameworks" in domains_lower:
127
+ return "Full-Stack Developer with UX Focus"
128
+ if "ui/ux design" in domains_lower and "programming languages" in domains_lower:
129
+ return "Design Technologist"
130
+
131
+ # --- UI/UX + AI Hybrids ---
132
+ if "ui/ux design" in domains_lower and "programming languages" in domains_lower and any("ai" in d for d in domains_lower):
133
+ return "AI-Driven Designer"
134
+
135
+ # --- Development + AI Hybrids ---
136
+ if "backend & web frameworks" in domains_lower and any("ai" in d for d in domains_lower):
137
+ return "AI-Enhanced Full-Stack Developer"
138
+ if "frontend" in domains_lower and any("ai" in d for d in domains_lower):
139
+ return "AI-Powered Frontend Engineer"
140
+
141
+ # --- Mobile + AI Hybrids ---
142
+ if "mobile development" in domains_lower and any("ai" in d for d in domains_lower):
143
+ return "AI-Powered Mobile Developer"
144
+
145
+ # --- Cloud/DevOps Hybrids ---
146
+ if "cloud computing" in domains_lower and "devops" in domains_lower:
147
+ return "Cloud DevOps Engineer"
148
+
149
+ # --- Web3 ---
150
+ if "blockchain development" in domains_lower and "web development" in domains_lower:
151
+ return "Web3 Developer"
152
+
153
+ # --- AI Research ---
154
+ if "ai" in domains_lower and "data science" in domains_lower:
155
+ return "AI Research Scientist"
156
+
157
+ # --- AI Infrastructure ---
158
+ if "ai" in domains_lower and "cloud computing" in domains_lower:
159
+ return "AI Infrastructure Engineer"
160
+
161
+ return None
162
+
163
+ # --- Hybrid Roadmap Builder (shortened for brevity) ---
164
+ def build_hybrid_roadmap(skills, career_name):
165
+ roadmap = OrderedDict()
166
+ roadmap["Beginner"] = ["Learn basics of each selected skill", "Project: simple hybrid prototype"]
167
+ roadmap["Intermediate"] = ["Combine skills into projects", "Project: hybrid application"]
168
+ roadmap["Advanced"] = ["Master frameworks across domains", "Project: large-scale hybrid system"]
169
+ roadmap["Expert"] = ["Lead innovation in hybrid domain", "Mentor others in hybrid specialization"]
170
+ return roadmap
171
+
172
+ # --- Embeddings Model ---
173
+ embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
174
+ skills = list(ROADMAPS.keys())
175
+ skill_embeddings = embed_model.encode(skills, convert_to_tensor=True)
176
+
177
+ # --- Extraction Logic (merged) ---
178
+ def extract_skills(user_text: str, threshold: float = 0.6):
179
+ found = []
180
+
181
+ if not user_text or not user_text.strip():
182
+ return []
183
+
184
+ text_lower = user_text.lower()
185
+
186
+ # --- Keyword & Synonym Matching ---
187
+ for skill in SKILLS:
188
+ if " " in skill.lower() and skill.lower() in text_lower:
189
+ if skill.lower() not in found:
190
+ found.append(skill.lower())
191
+
192
+ tokens = re.split(r'[,\n; ]+', text_lower)
193
+ tokens = [tk.strip() for tk in tokens if tk.strip()]
194
+
195
+ for tk in tokens:
196
+ if not tk or tk in STOPWORDS:
197
+ continue
198
+
199
+ if tk in SYN_LOWER:
200
+ mapped = SYN_LOWER[tk].lower()
201
+ if mapped in ROADMAPS and mapped not in found:
202
+ found.append(mapped)
203
+ continue
204
+
205
+ if tk in SKILLS_LOWER:
206
+ mapped = SKILLS_LOWER[tk].lower()
207
+ if mapped not in found:
208
+ found.append(mapped)
209
+ continue
210
+
211
+ if len(tk) >= 3:
212
+ match = get_close_matches(tk, SKILLS_LOWER.keys(), n=1, cutoff=0.75)
213
+ if match:
214
+ mapped = SKILLS_LOWER[match[0]].lower()
215
+ if mapped not in found:
216
+ found.append(mapped)
217
+
218
+ # --- Embeddings Matching ---
219
+ user_embedding = embed_model.encode(user_text, convert_to_tensor=True)
220
+ cosine_scores = util.cos_sim(user_embedding, skill_embeddings)[0]
221
+
222
+ for skill, score in zip(skills, cosine_scores):
223
+ if float(score) >= threshold and skill.lower() not in found:
224
+ found.append(skill.lower())
225
+
226
+ return found
227
+
228
+ # --- Merge multiple roadmaps ---
229
+ def merge_roadmaps(skills):
230
+ levels = ["beginner", "intermediate", "advanced", "expert"]
231
+ merged = OrderedDict()
232
+ for lvl in levels:
233
+ merged[lvl.capitalize()] = []
234
+ seen = set()
235
+ for skill in skills:
236
+ if skill not in ROADMAPS:
237
+ continue
238
+ steps = ROADMAPS[skill].get(lvl, [])
239
+ for s in steps:
240
+ s_norm = s.strip()
241
+ if s_norm and s_norm not in seen:
242
+ merged[lvl.capitalize()].append(f"{s_norm} β€” ({skill})")
243
+ seen.add(s_norm)
244
+ return merged
245
+
246
+ # --- Streamlit UI ---
247
+ st.set_page_config(page_title="Skill β†’ Roadmap", layout="wide")
248
+ st.title("Skill β†’ Roadmap")
249
+
250
+ user_input = st.text_area(
251
+ "Enter your skills (paragraph, comma-separated, or a sentence)",
252
+ height=140,
253
+ placeholder="e.g. I'm experienced in python, javascript, and figma"
254
+ )
255
+
256
+ if st.button("Generate roadmap"):
257
+ skills = extract_skills(user_input)
258
+ domains = [ROADMAPS[s].get("domain", "").lower() for s in skills if s in ROADMAPS]
259
+
260
+ st.markdown(f"πŸ”Ž **Detected skills:** {', '.join(skills) if skills else 'None'}")
261
+
262
+ intent = detect_intent_spacy(user_input)
263
+ want_single = True if intent == "single" else False if intent == "separate" else True
264
+
265
+ if want_single:
266
+ combined_career = suggest_combined_career(skills, domains)
267
+
268
+ if combined_career:
269
+ st.subheader(f"🌍 Hybrid Career Path: {combined_career}")
270
+ roadmap = build_hybrid_roadmap(skills, combined_career)
271
+ for lvl, steps in roadmap.items():
272
+ st.markdown(f"**{lvl}**")
273
+ for step in steps:
274
+ st.write(f"- {step}")
275
+ else:
276
+ merged = merge_roadmaps(skills)
277
+ career_names = suggest_career_names(skills)
278
+
279
+ st.subheader("🌍 Possible Career Paths:")
280
+ if career_names:
281
+ for c in career_names:
282
+ st.markdown(f"- {c}")
283
+ else:
284
+ st.write("No specific career paths found for these skills.")
285
 
286
+ st.subheader("πŸ“˜ Roadmap")
287
+ for lvl, steps in merged.items():
288
+ st.markdown(f"**{lvl}**")
289
+ for step in steps:
290
+ st.write(f"- {step}")
291
+ else:
292
+ st.subheader("πŸ“˜ Individual Skill Roadmaps")
293
+ if not skills:
294
+ st.write("No skills detected.")
295
+ for skill in skills:
296
+ if skill in ROADMAPS:
297
+ st.markdown(f"### {skill} β€” {ROADMAPS[skill].get('domain','')}")
298
+ for lvl, steps in ROADMAPS[skill].items():
299
+ if lvl in {"domain", "careers"}:
300
+ continue
301
+ st.markdown(f"**{lvl.capitalize()}**")
302
+ for s in steps:
303
+ st.write(f"- {s}")
304
+ else:
305
+ st.warning(f"No roadmap found for {skill}")