ak0601 commited on
Commit
71d720b
·
verified ·
1 Parent(s): 763d20d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -164
app.py CHANGED
@@ -1,164 +1,164 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel, Field
3
- from langchain_google_genai import ChatGoogleGenerativeAI
4
- from langchain_core.prompts import ChatPromptTemplate
5
- import json
6
- from firecrawl import FirecrawlApp
7
- import gspread
8
- import os
9
- from dotenv import load_dotenv
10
- import json
11
-
12
- load_dotenv()
13
-
14
- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
15
- FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
16
- SHEET_ID = os.getenv("SHEET_ID")
17
- dic = os.getenv("genai")
18
- if dic:
19
- try:
20
- dic1 = json.loads(dic)
21
- print(dic1)
22
- except json.JSONDecodeError:
23
- print("Error: 'genai' environment variable is not valid JSON")
24
- else:
25
- print("Warning: 'genai' environment variable is not set")
26
- # Setup Google Sheets connection (update the path and sheet name)
27
- genai ={
28
- "type": os.getenv("type"),
29
- "project_id": os.getenv("project_id"),
30
- "private_key_id": os.getenv("private_key_id"),
31
- "private_key": os.getenv("private_key"),
32
- "client_email": os.getenv("client_email"),
33
- "client_id": os.getenv("client_id"),
34
- "auth_uri": os.getenv("auth_uri"),
35
- "token_uri": os.getenv("token_uri"),
36
- "auth_provider_x509_cert_url": os.getenv("auth_provider_x509_cert_url"),
37
- "client_x509_cert_url": os.getenv("client_x509_cert_url"),
38
- "universe_domain": os.getenv("universe_domain")
39
- }
40
- gc = gspread.service_account_from_dict(dic1)
41
- sh = gc.open_by_key(SHEET_ID) # Replace with your Google Sheet name
42
- worksheet = sh.worksheet("S1") # Replace with your worksheet name if different
43
-
44
- # Define your URL scraping function
45
- def url_scrape(url):
46
- app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
47
- response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']})
48
- try:
49
- return response
50
- except Exception:
51
- return response
52
-
53
- # Define the structured output model for job description extraction
54
- class JDE(BaseModel):
55
- Role: str = Field(description="Title of the job")
56
- Company: str = Field(description="Name of the company")
57
- Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.")
58
- Industry: str = Field(description="Type of Industry the job belongs to")
59
- Type: str = Field(description="Working style (Remote, Hybrid, Onsite)")
60
- Location: str = Field(description="Location of the company")
61
-
62
- # The core function that processes the job input and appends data to Google Sheets
63
- def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website):
64
- # Scrape the job description from the provided link
65
- jd = url_scrape(links)
66
-
67
- # Create the prompt for the language model
68
- system = (
69
- "You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it."
70
- )
71
- prompt_text = """
72
- You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
73
- Try to return your answer in JSON format based on the following structure:
74
- {{
75
- "Role": "Title of the job",
76
- "Company": "Name of the company the job is about",
77
- "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
78
- "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
79
- "Type": "Working style (Remote, Hybrid, Onsite)",
80
- "Location": "Location of the company"
81
- }}
82
- Job Description: {jd}
83
- """
84
-
85
- query_prompt = ChatPromptTemplate.from_messages([
86
- ("system", system),
87
- ("human", """
88
- You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
89
- Try to return your answer in JSON format based on the following structure:
90
- {{
91
- "Role": "Title of the job",
92
- "Company": "Name of the company the job is about",
93
- "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
94
- "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
95
- "Type": "Working style (Remote, Hybrid, Onsite)",
96
- "Location": "Location of the company"
97
- }}
98
- Job Description: {job_description}
99
- """)
100
- ])
101
-
102
- # Initialize the language model and set it up for structured output using the JDE model
103
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81)
104
- str_llm = llm.with_structured_output(JDE)
105
- JDE_re = query_prompt | str_llm
106
- # Invoke the language model to extract structured job details
107
- q = JDE_re.invoke({"job_description": jd})
108
-
109
- # Extract additional fields
110
- req = q.Requirements
111
- indus = q.Industry
112
-
113
- # Prepare the row with all the data (append the two extra fields at the end)
114
- row = [
115
- links, company, role, one_liner, reward, locations,
116
- tech_stack, workplace, salary, equity, yoe, team_size,
117
- funding, website, req, indus
118
- ]
119
- worksheet.append_row(row)
120
-
121
- return q
122
-
123
- # Define a Pydantic model for the API input
124
- class JobInput(BaseModel):
125
- links: str
126
- company: str
127
- role: str
128
- one_liner: str
129
- reward: str
130
- locations: str
131
- tech_stack: str
132
- workplace: str
133
- salary: str
134
- equity: str
135
- yoe: str
136
- team_size: str
137
- funding: str
138
- website: str
139
-
140
- # Create the FastAPI app instance
141
- app = FastAPI()
142
-
143
- @app.post("/create-job")
144
- def create_job(job: JobInput):
145
- try:
146
- result = fastapi_func(
147
- links=job.links,
148
- company=job.company,
149
- role=job.role,
150
- one_liner=job.one_liner,
151
- reward=job.reward,
152
- locations=job.locations,
153
- tech_stack=job.tech_stack,
154
- workplace=job.workplace,
155
- salary=job.salary,
156
- equity=job.equity,
157
- yoe=job.yoe,
158
- team_size=job.team_size,
159
- funding=job.funding,
160
- website=job.website
161
- )
162
- return result
163
- except Exception as e:
164
- raise HTTPException(status_code=500, detail=str(e))
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel, Field
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ import json
6
+ from firecrawl import FirecrawlApp
7
+ import gspread
8
+ import os
9
+ from dotenv import load_dotenv
10
+ import json
11
+
12
+ load_dotenv()
13
+
14
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
15
+ FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
16
+ SHEET_ID = os.getenv("SHEET_ID")
17
+ dic = os.getenv("genai")
18
+ if dic:
19
+ try:
20
+ dic1 = json.loads(dic)
21
+ print(dic1)
22
+ except json.JSONDecodeError:
23
+ print("Error: 'genai' environment variable is not valid JSON")
24
+ else:
25
+ print("Warning: 'genai' environment variable is not set")
26
+ # Setup Google Sheets connection (update the path and sheet name)
27
+ genai ={
28
+ "type": os.getenv("type"),
29
+ "project_id": os.getenv("project_id"),
30
+ "private_key_id": os.getenv("private_key_id"),
31
+ "private_key": os.getenv("private_key"),
32
+ "client_email": os.getenv("client_email"),
33
+ "client_id": os.getenv("client_id"),
34
+ "auth_uri": os.getenv("auth_uri"),
35
+ "token_uri": os.getenv("token_uri"),
36
+ "auth_provider_x509_cert_url": os.getenv("auth_provider_x509_cert_url"),
37
+ "client_x509_cert_url": os.getenv("client_x509_cert_url"),
38
+ "universe_domain": os.getenv("universe_domain")
39
+ }
40
+ gc = gspread.service_account_from_dict(genai)
41
+ sh = gc.open_by_key(SHEET_ID) # Replace with your Google Sheet name
42
+ worksheet = sh.worksheet("S1") # Replace with your worksheet name if different
43
+
44
+ # Define your URL scraping function
45
+ def url_scrape(url):
46
+ app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
47
+ response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']})
48
+ try:
49
+ return response
50
+ except Exception:
51
+ return response
52
+
53
+ # Define the structured output model for job description extraction
54
+ class JDE(BaseModel):
55
+ Role: str = Field(description="Title of the job")
56
+ Company: str = Field(description="Name of the company")
57
+ Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.")
58
+ Industry: str = Field(description="Type of Industry the job belongs to")
59
+ Type: str = Field(description="Working style (Remote, Hybrid, Onsite)")
60
+ Location: str = Field(description="Location of the company")
61
+
62
+ # The core function that processes the job input and appends data to Google Sheets
63
+ def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website):
64
+ # Scrape the job description from the provided link
65
+ jd = url_scrape(links)
66
+
67
+ # Create the prompt for the language model
68
+ system = (
69
+ "You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it."
70
+ )
71
+ prompt_text = """
72
+ You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
73
+ Try to return your answer in JSON format based on the following structure:
74
+ {{
75
+ "Role": "Title of the job",
76
+ "Company": "Name of the company the job is about",
77
+ "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
78
+ "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
79
+ "Type": "Working style (Remote, Hybrid, Onsite)",
80
+ "Location": "Location of the company"
81
+ }}
82
+ Job Description: {jd}
83
+ """
84
+
85
+ query_prompt = ChatPromptTemplate.from_messages([
86
+ ("system", system),
87
+ ("human", """
88
+ You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
89
+ Try to return your answer in JSON format based on the following structure:
90
+ {{
91
+ "Role": "Title of the job",
92
+ "Company": "Name of the company the job is about",
93
+ "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
94
+ "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
95
+ "Type": "Working style (Remote, Hybrid, Onsite)",
96
+ "Location": "Location of the company"
97
+ }}
98
+ Job Description: {job_description}
99
+ """)
100
+ ])
101
+
102
+ # Initialize the language model and set it up for structured output using the JDE model
103
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81)
104
+ str_llm = llm.with_structured_output(JDE)
105
+ JDE_re = query_prompt | str_llm
106
+ # Invoke the language model to extract structured job details
107
+ q = JDE_re.invoke({"job_description": jd})
108
+
109
+ # Extract additional fields
110
+ req = q.Requirements
111
+ indus = q.Industry
112
+
113
+ # Prepare the row with all the data (append the two extra fields at the end)
114
+ row = [
115
+ links, company, role, one_liner, reward, locations,
116
+ tech_stack, workplace, salary, equity, yoe, team_size,
117
+ funding, website, req, indus
118
+ ]
119
+ worksheet.append_row(row)
120
+
121
+ return q
122
+
123
+ # Define a Pydantic model for the API input
124
+ class JobInput(BaseModel):
125
+ links: str
126
+ company: str
127
+ role: str
128
+ one_liner: str
129
+ reward: str
130
+ locations: str
131
+ tech_stack: str
132
+ workplace: str
133
+ salary: str
134
+ equity: str
135
+ yoe: str
136
+ team_size: str
137
+ funding: str
138
+ website: str
139
+
140
+ # Create the FastAPI app instance
141
+ app = FastAPI()
142
+
143
+ @app.post("/create-job")
144
+ def create_job(job: JobInput):
145
+ try:
146
+ result = fastapi_func(
147
+ links=job.links,
148
+ company=job.company,
149
+ role=job.role,
150
+ one_liner=job.one_liner,
151
+ reward=job.reward,
152
+ locations=job.locations,
153
+ tech_stack=job.tech_stack,
154
+ workplace=job.workplace,
155
+ salary=job.salary,
156
+ equity=job.equity,
157
+ yoe=job.yoe,
158
+ team_size=job.team_size,
159
+ funding=job.funding,
160
+ website=job.website
161
+ )
162
+ return result
163
+ except Exception as e:
164
+ raise HTTPException(status_code=500, detail=str(e))