Spaces:
Paused
Paused
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel, Field | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_core.prompts import ChatPromptTemplate | |
| import json | |
| from firecrawl import FirecrawlApp | |
| import gspread | |
| import os | |
| from dotenv import load_dotenv | |
| import json | |
| load_dotenv() | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY") | |
| SHEET_ID = os.getenv("SHEET") | |
| cred_dic = os.getenv("cred_dict") | |
| cred_dict = json.loads(cred_dic) | |
| # Setup Google Sheets connection (update the path and sheet name) | |
| # genai ={ | |
| # "type": str(os.getenv("type")), | |
| # "project_id": str(os.getenv("project_id")), | |
| # "private_key_id": str(os.getenv("private_key_id")), | |
| # "private_key": str(os.getenv("private_key")), | |
| # "client_email": str(os.getenv("client_email")), | |
| # "client_id": str(os.getenv("client_id")), | |
| # "auth_uri": str(os.getenv("auth_uri")), | |
| # "token_uri": str(os.getenv("token_uri")), | |
| # "auth_provider_x509_cert_url": str(os.getenv("auth_provider_x509_cert_url")), | |
| # "client_x509_cert_url": str(os.getenv("client_x509_cert_url")), | |
| # "universe_domain": str(os.getenv("universe_domain")) | |
| # } | |
| gc = gspread.service_account_from_dict(cred_dict) | |
| sh = gc.open_by_key(SHEET_ID) # Replace with your Google Sheet name | |
| worksheet = sh.worksheet("S1") # Replace with your worksheet name if different | |
| # Define your URL scraping function | |
| def url_scrape(url): | |
| app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY) | |
| response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']}) | |
| try: | |
| return response | |
| except Exception: | |
| return response | |
| # Define the structured output model for job description extraction | |
| class JDE(BaseModel): | |
| Role: str = Field(description="Title of the job") | |
| Company: str = Field(description="Name of the company") | |
| Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.") | |
| Industry: str = Field(description="Type of Industry the job belongs to") | |
| Type: str = Field(description="Working style (Remote, Hybrid, Onsite)") | |
| Location: str = Field(description="Location of the company") | |
| # The core function that processes the job input and appends data to Google Sheets | |
| def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website): | |
| # Scrape the job description from the provided link | |
| jd = url_scrape(links) | |
| # Create the prompt for the language model | |
| system = ( | |
| "You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it." | |
| ) | |
| prompt_text = """ | |
| You are an expert job description writer. Your task is to restructure the given job description and extract relevant information. | |
| Try to return your answer in JSON format based on the following structure: | |
| {{ | |
| "Role": "Title of the job", | |
| "Company": "Name of the company the job is about", | |
| "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.", | |
| "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)", | |
| "Type": "Working style (Remote, Hybrid, Onsite)", | |
| "Location": "Location of the company" | |
| }} | |
| Job Description: {jd} | |
| """ | |
| query_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", system), | |
| ("human", """ | |
| You are an expert job description writer. Your task is to restructure the given job description and extract relevant information. | |
| Try to return your answer in JSON format based on the following structure: | |
| {{ | |
| "Role": "Title of the job", | |
| "Company": "Name of the company the job is about", | |
| "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.", | |
| "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)", | |
| "Type": "Working style (Remote, Hybrid, Onsite)", | |
| "Location": "Location of the company" | |
| }} | |
| Job Description: {job_description} | |
| """) | |
| ]) | |
| # Initialize the language model and set it up for structured output using the JDE model | |
| llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81) | |
| str_llm = llm.with_structured_output(JDE) | |
| JDE_re = query_prompt | str_llm | |
| # Invoke the language model to extract structured job details | |
| q = JDE_re.invoke({"job_description": jd}) | |
| # Extract additional fields | |
| req = q.Requirements | |
| indus = q.Industry | |
| # Prepare the row with all the data (append the two extra fields at the end) | |
| row = [ | |
| links, company, role, one_liner, reward, locations, | |
| tech_stack, workplace, salary, equity, yoe, team_size, | |
| funding, website, req, indus | |
| ] | |
| worksheet.append_row(row) | |
| return q | |
| # Define a Pydantic model for the API input | |
| class JobInput(BaseModel): | |
| links: str | |
| company: str | |
| role: str | |
| one_liner: str | |
| reward: str | |
| locations: str | |
| tech_stack: str | |
| workplace: str | |
| salary: str | |
| equity: str | |
| yoe: str | |
| team_size: str | |
| funding: str | |
| website: str | |
| # Create the FastAPI app instance | |
| app = FastAPI() | |
| def create_job(job: JobInput): | |
| try: | |
| result = fastapi_func( | |
| links=job.links, | |
| company=job.company, | |
| role=job.role, | |
| one_liner=job.one_liner, | |
| reward=job.reward, | |
| locations=job.locations, | |
| tech_stack=job.tech_stack, | |
| workplace=job.workplace, | |
| salary=job.salary, | |
| equity=job.equity, | |
| yoe=job.yoe, | |
| team_size=job.team_size, | |
| funding=job.funding, | |
| website=job.website | |
| ) | |
| return result | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |