PraneshJs commited on
Commit
cbde19b
·
verified ·
1 Parent(s): cf5efab

added persnolized way using github and leetcode

Browse files
Files changed (1) hide show
  1. app.py +142 -24
app.py CHANGED
@@ -5,54 +5,172 @@ from crawl4ai.content_filter_strategy import PruningContentFilter
5
  from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
6
  from openai import AzureOpenAI
7
  from dotenv import load_dotenv
 
 
8
  load_dotenv()
 
 
 
 
 
 
 
9
  client = AzureOpenAI(
10
- api_key=os.getenv("AZURE_OPENAI_KEY").strip(),
11
  api_version="2025-01-01-preview",
12
- azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
13
  )
14
 
15
- DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT").strip()
16
- SERPER_API_KEY = os.getenv("SERPER_API_KEY").strip()
 
 
 
 
17
 
18
  def search_company_interviews(company):
19
- headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
20
- query = f"{company} interview experience site:glassdoor.com OR site:geeksforgeeks.org OR site:prepinsta.com"
21
- r = requests.post("https://google.serper.dev/search", headers=headers, json={"q": query})
 
 
 
 
 
 
 
 
 
 
 
 
22
  return [res["link"] for res in r.json().get("organic", [])[:3]]
23
 
24
  async def crawl_url(url):
25
  browser_conf = BrowserConfig(headless=True)
26
- filter_strategy = PruningContentFilter()
27
  md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy)
28
  run_conf = CrawlerRunConfig(markdown_generator=md_gen)
29
 
30
  async with AsyncWebCrawler(config=browser_conf) as crawler:
31
  result = await crawler.arun(url=url, config=run_conf)
32
- return result.markdown.fit_markdown or result.markdown.raw_markdown
33
 
34
  async def fetch_and_summarize(company):
35
  urls = search_company_interviews(company)
36
- contents = []
37
- for url in urls:
38
- contents.append(await crawl_url(url))
39
- context = "\n".join(contents)[:4000]
40
  messages = [
41
- {"role": "system", "content": "You summarize interview experiences for job seekers."},
42
- {"role": "user", "content": f"Summarize interview process for {company} based on:\n{context}"}
 
 
 
 
 
 
 
 
 
43
  ]
44
- response = client.chat.completions.create(model=DEPLOYMENT_NAME, messages=messages, max_tokens=800)
45
- return response.choices[0].message.content
46
 
47
- def get_interview_experience(company):
48
- return asyncio.run(fetch_and_summarize(company))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  with gr.Blocks() as demo:
51
- gr.Markdown("## 💼 Interview Process and Expericence Finder")
52
- company = gr.Textbox(label="Company Name")
53
- output = gr.Textbox(label="Interview Insights", lines=15)
54
- btn = gr.Button("Fetch")
55
- btn.click(get_interview_experience, inputs=[company], outputs=[output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  if __name__ == "__main__":
58
  demo.launch(share=False, server_name="0.0.0.0", server_port=7860, pwa=True)
 
5
  from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
6
  from openai import AzureOpenAI
7
  from dotenv import load_dotenv
8
+
9
+ # ---------------- ENV ----------------
10
  load_dotenv()
11
+
12
+ def must_env(name):
13
+ v = os.getenv(name)
14
+ if not v:
15
+ raise RuntimeError(f"Missing env var: {name}")
16
+ return v
17
+
18
  client = AzureOpenAI(
19
+ api_key=must_env("AZURE_OPENAI_KEY"),
20
  api_version="2025-01-01-preview",
21
+ azure_endpoint=must_env("AZURE_OPENAI_ENDPOINT"),
22
  )
23
 
24
+ DEPLOYMENT_NAME = must_env("AZURE_OPENAI_DEPLOYMENT")
25
+ SERPER_API_KEY = must_env("SERPER_API_KEY")
26
+
27
+ # =========================================================
28
+ # =============== INTERVIEW INSIGHTS MODULE ==============
29
+ # =========================================================
30
 
31
  def search_company_interviews(company):
32
+ headers = {
33
+ "X-API-KEY": SERPER_API_KEY,
34
+ "Content-Type": "application/json"
35
+ }
36
+ query = (
37
+ f"{company} interview experience "
38
+ "site:glassdoor.com OR site:geeksforgeeks.org OR site:prepinsta.com"
39
+ )
40
+ r = requests.post(
41
+ "https://google.serper.dev/search",
42
+ headers=headers,
43
+ json={"q": query, "num": 5},
44
+ timeout=15
45
+ )
46
+ r.raise_for_status()
47
  return [res["link"] for res in r.json().get("organic", [])[:3]]
48
 
49
  async def crawl_url(url):
50
  browser_conf = BrowserConfig(headless=True)
51
+ filter_strategy = PruningContentFilter(threshold=0.48) # Remove min_words parameter
52
  md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy)
53
  run_conf = CrawlerRunConfig(markdown_generator=md_gen)
54
 
55
  async with AsyncWebCrawler(config=browser_conf) as crawler:
56
  result = await crawler.arun(url=url, config=run_conf)
57
+ return (result.markdown.fit_markdown or "")[:2500]
58
 
59
  async def fetch_and_summarize(company):
60
  urls = search_company_interviews(company)
61
+ pages = await asyncio.gather(*[crawl_url(u) for u in urls])
62
+
63
+ context = "\n\n".join(pages)
64
+
65
  messages = [
66
+ {"role": "system", "content": "Summarize interview experiences concisely."},
67
+ {"role": "user", "content": f"""
68
+ Summarize interview process for {company}:
69
+ - Rounds
70
+ - Difficulty
71
+ - Topics asked
72
+ - Preparation tips
73
+
74
+ Content:
75
+ {context}
76
+ """}
77
  ]
 
 
78
 
79
+ response = client.chat.completions.create(
80
+ model=DEPLOYMENT_NAME,
81
+ messages=messages,
82
+ max_tokens=700
83
+ )
84
+
85
+ sources = "\n".join(f"- {u}" for u in urls)
86
+ return f"{response.choices[0].message.content}\n\n🔗 Sources:\n{sources}"
87
+
88
+ # =========================================================
89
+ # ========== ADAPTIVE LEARNING ECOSYSTEM MODULE ===========
90
+ # =========================================================
91
+
92
+ def fetch_github_stats(username):
93
+ url = f"https://github-readme-stats-fast.vercel.app/api/top-langs/?username={username}&layout=compact"
94
+ r = requests.get(url, timeout=10)
95
+ return r.text[:2000] # SVG summary
96
+
97
+ def fetch_leetcode_data(username):
98
+ base = f"https://leetcode-api-vercel.vercel.app/{username}"
99
+ endpoints = {
100
+ "profile": "",
101
+ "solved": "/solved",
102
+ "skill": "/skill",
103
+ "progress": "/progress",
104
+ }
105
+
106
+ data = {}
107
+ for k, path in endpoints.items():
108
+ r = requests.get(base + path, timeout=10)
109
+ if r.ok:
110
+ data[k] = r.json()
111
+ return data
112
+
113
+ def generate_learning_plan(github_user, leetcode_user):
114
+ github_data = fetch_github_stats(github_user)
115
+ leetcode_data = fetch_leetcode_data(leetcode_user)
116
+
117
+ prompt = f"""
118
+ You are an adaptive learning ecosystem focused on India's job market.
119
+
120
+ GitHub language usage (SVG):
121
+ {github_data}
122
+
123
+ LeetCode performance (JSON):
124
+ {leetcode_data}
125
+
126
+ Tasks:
127
+ 1. Infer aptitude level
128
+ 2. Identify strong & weak skills
129
+ 3. Suggest 3 suitable job roles in India
130
+ 4. Create a 6-week adaptive learning roadmap
131
+ 5. Recommend LeetCode topics to focus next
132
+
133
+ Be structured and practical.
134
+ """
135
+
136
+ resp = client.chat.completions.create(
137
+ model=DEPLOYMENT_NAME,
138
+ messages=[
139
+ {"role": "system", "content": "Design personalized learning paths. Make it practical. and use only the provided data. give correct output within 900 words or below"},
140
+ {"role": "user", "content": prompt},
141
+ ],
142
+ max_tokens=900,
143
+ )
144
+
145
+ return resp.choices[0].message.content
146
+
147
+ # =========================================================
148
+ # ======================= UI =============================
149
+ # =========================================================
150
 
151
  with gr.Blocks() as demo:
152
+ gr.Markdown("# 🚀 AI Career Intelligence Platform")
153
+
154
+ with gr.Tabs():
155
+
156
+ # -------- TAB 1 --------
157
+ with gr.Tab("💼 Interview Insights"):
158
+ company = gr.Textbox(label="Company Name", placeholder="Amazon, Infosys")
159
+ interview_output = gr.Textbox(lines=18, label="Interview Summary")
160
+ btn1 = gr.Button("Fetch Interview Experience")
161
+ btn1.click(fetch_and_summarize, company, interview_output)
162
+
163
+ # -------- TAB 2 --------
164
+ with gr.Tab("🎓 Adaptive Learning Ecosystem"):
165
+ github_user = gr.Textbox(label="GitHub Username")
166
+ leetcode_user = gr.Textbox(label="LeetCode Username")
167
+ learning_output = gr.Textbox(lines=20, label="Personalized Learning Plan")
168
+ btn2 = gr.Button("Generate Learning Roadmap")
169
+ btn2.click(
170
+ generate_learning_plan,
171
+ inputs=[github_user, leetcode_user],
172
+ outputs=learning_output
173
+ )
174
 
175
  if __name__ == "__main__":
176
  demo.launch(share=False, server_name="0.0.0.0", server_port=7860, pwa=True)