Spaces:
Sleeping
Sleeping
changed the title
Browse files
app.py
CHANGED
|
@@ -1,58 +1,58 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import os, requests, asyncio
|
| 3 |
-
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
|
| 4 |
-
from crawl4ai.content_filter_strategy import PruningContentFilter
|
| 5 |
-
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
|
| 6 |
-
from openai import AzureOpenAI
|
| 7 |
-
from dotenv import load_dotenv
|
| 8 |
-
load_dotenv()
|
| 9 |
-
client = AzureOpenAI(
|
| 10 |
-
api_key=os.getenv("AZURE_OPENAI_KEY").strip(),
|
| 11 |
-
api_version="2025-01-01-preview",
|
| 12 |
-
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
|
| 13 |
-
)
|
| 14 |
-
|
| 15 |
-
DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT").strip()
|
| 16 |
-
SERPER_API_KEY = os.getenv("SERPER_API_KEY").strip()
|
| 17 |
-
|
| 18 |
-
def search_company_interviews(company):
|
| 19 |
-
headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
|
| 20 |
-
query = f"{company} interview experience site:glassdoor.com OR site:geeksforgeeks.org OR site:prepinsta.com"
|
| 21 |
-
r = requests.post("https://google.serper.dev/search", headers=headers, json={"q": query})
|
| 22 |
-
return [res["link"] for res in r.json().get("organic", [])[:3]]
|
| 23 |
-
|
| 24 |
-
async def crawl_url(url):
|
| 25 |
-
browser_conf = BrowserConfig(headless=True)
|
| 26 |
-
filter_strategy = PruningContentFilter()
|
| 27 |
-
md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy)
|
| 28 |
-
run_conf = CrawlerRunConfig(markdown_generator=md_gen)
|
| 29 |
-
|
| 30 |
-
async with AsyncWebCrawler(config=browser_conf) as crawler:
|
| 31 |
-
result = await crawler.arun(url=url, config=run_conf)
|
| 32 |
-
return result.markdown.fit_markdown or result.markdown.raw_markdown
|
| 33 |
-
|
| 34 |
-
async def fetch_and_summarize(company):
|
| 35 |
-
urls = search_company_interviews(company)
|
| 36 |
-
contents = []
|
| 37 |
-
for url in urls:
|
| 38 |
-
contents.append(await crawl_url(url))
|
| 39 |
-
context = "\n".join(contents)[:4000]
|
| 40 |
-
messages = [
|
| 41 |
-
{"role": "system", "content": "You summarize interview experiences for job seekers."},
|
| 42 |
-
{"role": "user", "content": f"Summarize interview process for {company} based on:\n{context}"}
|
| 43 |
-
]
|
| 44 |
-
response = client.chat.completions.create(model=DEPLOYMENT_NAME, messages=messages, max_tokens=800)
|
| 45 |
-
return response.choices[0].message.content
|
| 46 |
-
|
| 47 |
-
def get_interview_experience(company):
|
| 48 |
-
return asyncio.run(fetch_and_summarize(company))
|
| 49 |
-
|
| 50 |
-
with gr.Blocks() as demo:
|
| 51 |
-
gr.Markdown("## 💼 Interview
|
| 52 |
-
company = gr.Textbox(label="Company Name")
|
| 53 |
-
output = gr.Textbox(label="Interview Insights", lines=15)
|
| 54 |
-
btn = gr.Button("Fetch")
|
| 55 |
-
btn.click(get_interview_experience, inputs=[company], outputs=[output])
|
| 56 |
-
|
| 57 |
-
if __name__ == "__main__":
|
| 58 |
-
demo.launch(share=False, server_name="0.0.0.0", server_port=7860, pwa=True)
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os, requests, asyncio
|
| 3 |
+
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
|
| 4 |
+
from crawl4ai.content_filter_strategy import PruningContentFilter
|
| 5 |
+
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
|
| 6 |
+
from openai import AzureOpenAI
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
load_dotenv()
|
| 9 |
+
client = AzureOpenAI(
|
| 10 |
+
api_key=os.getenv("AZURE_OPENAI_KEY").strip(),
|
| 11 |
+
api_version="2025-01-01-preview",
|
| 12 |
+
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT").strip()
|
| 16 |
+
SERPER_API_KEY = os.getenv("SERPER_API_KEY").strip()
|
| 17 |
+
|
| 18 |
+
def search_company_interviews(company):
|
| 19 |
+
headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
|
| 20 |
+
query = f"{company} interview experience site:glassdoor.com OR site:geeksforgeeks.org OR site:prepinsta.com"
|
| 21 |
+
r = requests.post("https://google.serper.dev/search", headers=headers, json={"q": query})
|
| 22 |
+
return [res["link"] for res in r.json().get("organic", [])[:3]]
|
| 23 |
+
|
| 24 |
+
async def crawl_url(url):
|
| 25 |
+
browser_conf = BrowserConfig(headless=True)
|
| 26 |
+
filter_strategy = PruningContentFilter()
|
| 27 |
+
md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy)
|
| 28 |
+
run_conf = CrawlerRunConfig(markdown_generator=md_gen)
|
| 29 |
+
|
| 30 |
+
async with AsyncWebCrawler(config=browser_conf) as crawler:
|
| 31 |
+
result = await crawler.arun(url=url, config=run_conf)
|
| 32 |
+
return result.markdown.fit_markdown or result.markdown.raw_markdown
|
| 33 |
+
|
| 34 |
+
async def fetch_and_summarize(company):
|
| 35 |
+
urls = search_company_interviews(company)
|
| 36 |
+
contents = []
|
| 37 |
+
for url in urls:
|
| 38 |
+
contents.append(await crawl_url(url))
|
| 39 |
+
context = "\n".join(contents)[:4000]
|
| 40 |
+
messages = [
|
| 41 |
+
{"role": "system", "content": "You summarize interview experiences for job seekers."},
|
| 42 |
+
{"role": "user", "content": f"Summarize interview process for {company} based on:\n{context}"}
|
| 43 |
+
]
|
| 44 |
+
response = client.chat.completions.create(model=DEPLOYMENT_NAME, messages=messages, max_tokens=800)
|
| 45 |
+
return response.choices[0].message.content
|
| 46 |
+
|
| 47 |
+
def get_interview_experience(company):
|
| 48 |
+
return asyncio.run(fetch_and_summarize(company))
|
| 49 |
+
|
| 50 |
+
with gr.Blocks() as demo:
|
| 51 |
+
gr.Markdown("## 💼 Interview Process and Expericence Finder")
|
| 52 |
+
company = gr.Textbox(label="Company Name")
|
| 53 |
+
output = gr.Textbox(label="Interview Insights", lines=15)
|
| 54 |
+
btn = gr.Button("Fetch")
|
| 55 |
+
btn.click(get_interview_experience, inputs=[company], outputs=[output])
|
| 56 |
+
|
| 57 |
+
if __name__ == "__main__":
|
| 58 |
+
demo.launch(share=False, server_name="0.0.0.0", server_port=7860, pwa=True)
|