PraneshJs commited on
Commit
cf5efab
·
verified ·
1 Parent(s): 36630ea

changed the title

Browse files
Files changed (1) hide show
  1. app.py +58 -58
app.py CHANGED
@@ -1,58 +1,58 @@
1
- import gradio as gr
2
- import os, requests, asyncio
3
- from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
4
- from crawl4ai.content_filter_strategy import PruningContentFilter
5
- from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
6
- from openai import AzureOpenAI
7
- from dotenv import load_dotenv
8
- load_dotenv()
9
- client = AzureOpenAI(
10
- api_key=os.getenv("AZURE_OPENAI_KEY").strip(),
11
- api_version="2025-01-01-preview",
12
- azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
13
- )
14
-
15
- DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT").strip()
16
- SERPER_API_KEY = os.getenv("SERPER_API_KEY").strip()
17
-
18
- def search_company_interviews(company):
19
- headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
20
- query = f"{company} interview experience site:glassdoor.com OR site:geeksforgeeks.org OR site:prepinsta.com"
21
- r = requests.post("https://google.serper.dev/search", headers=headers, json={"q": query})
22
- return [res["link"] for res in r.json().get("organic", [])[:3]]
23
-
24
- async def crawl_url(url):
25
- browser_conf = BrowserConfig(headless=True)
26
- filter_strategy = PruningContentFilter()
27
- md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy)
28
- run_conf = CrawlerRunConfig(markdown_generator=md_gen)
29
-
30
- async with AsyncWebCrawler(config=browser_conf) as crawler:
31
- result = await crawler.arun(url=url, config=run_conf)
32
- return result.markdown.fit_markdown or result.markdown.raw_markdown
33
-
34
- async def fetch_and_summarize(company):
35
- urls = search_company_interviews(company)
36
- contents = []
37
- for url in urls:
38
- contents.append(await crawl_url(url))
39
- context = "\n".join(contents)[:4000]
40
- messages = [
41
- {"role": "system", "content": "You summarize interview experiences for job seekers."},
42
- {"role": "user", "content": f"Summarize interview process for {company} based on:\n{context}"}
43
- ]
44
- response = client.chat.completions.create(model=DEPLOYMENT_NAME, messages=messages, max_tokens=800)
45
- return response.choices[0].message.content
46
-
47
- def get_interview_experience(company):
48
- return asyncio.run(fetch_and_summarize(company))
49
-
50
- with gr.Blocks() as demo:
51
- gr.Markdown("## 💼 Interview Experience Finder")
52
- company = gr.Textbox(label="Company Name")
53
- output = gr.Textbox(label="Interview Insights", lines=15)
54
- btn = gr.Button("Fetch")
55
- btn.click(get_interview_experience, inputs=[company], outputs=[output])
56
-
57
- if __name__ == "__main__":
58
- demo.launch(share=False, server_name="0.0.0.0", server_port=7860, pwa=True)
 
1
+ import gradio as gr
2
+ import os, requests, asyncio
3
+ from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
4
+ from crawl4ai.content_filter_strategy import PruningContentFilter
5
+ from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
6
+ from openai import AzureOpenAI
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+ client = AzureOpenAI(
10
+ api_key=os.getenv("AZURE_OPENAI_KEY").strip(),
11
+ api_version="2025-01-01-preview",
12
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
13
+ )
14
+
15
+ DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT").strip()
16
+ SERPER_API_KEY = os.getenv("SERPER_API_KEY").strip()
17
+
18
+ def search_company_interviews(company):
19
+ headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
20
+ query = f"{company} interview experience site:glassdoor.com OR site:geeksforgeeks.org OR site:prepinsta.com"
21
+ r = requests.post("https://google.serper.dev/search", headers=headers, json={"q": query})
22
+ return [res["link"] for res in r.json().get("organic", [])[:3]]
23
+
24
+ async def crawl_url(url):
25
+ browser_conf = BrowserConfig(headless=True)
26
+ filter_strategy = PruningContentFilter()
27
+ md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy)
28
+ run_conf = CrawlerRunConfig(markdown_generator=md_gen)
29
+
30
+ async with AsyncWebCrawler(config=browser_conf) as crawler:
31
+ result = await crawler.arun(url=url, config=run_conf)
32
+ return result.markdown.fit_markdown or result.markdown.raw_markdown
33
+
34
+ async def fetch_and_summarize(company):
35
+ urls = search_company_interviews(company)
36
+ contents = []
37
+ for url in urls:
38
+ contents.append(await crawl_url(url))
39
+ context = "\n".join(contents)[:4000]
40
+ messages = [
41
+ {"role": "system", "content": "You summarize interview experiences for job seekers."},
42
+ {"role": "user", "content": f"Summarize interview process for {company} based on:\n{context}"}
43
+ ]
44
+ response = client.chat.completions.create(model=DEPLOYMENT_NAME, messages=messages, max_tokens=800)
45
+ return response.choices[0].message.content
46
+
47
+ def get_interview_experience(company):
48
+ return asyncio.run(fetch_and_summarize(company))
49
+
50
+ with gr.Blocks() as demo:
51
+ gr.Markdown("## 💼 Interview Process and Expericence Finder")
52
+ company = gr.Textbox(label="Company Name")
53
+ output = gr.Textbox(label="Interview Insights", lines=15)
54
+ btn = gr.Button("Fetch")
55
+ btn.click(get_interview_experience, inputs=[company], outputs=[output])
56
+
57
+ if __name__ == "__main__":
58
+ demo.launch(share=False, server_name="0.0.0.0", server_port=7860, pwa=True)