Spaces:
Sleeping
Sleeping
TejaCherukuri
commited on
Commit
·
2544e0a
1
Parent(s):
77bf1df
bug fixes and added feature
Browse files- app.py +55 -24
- src/job_extractor.py +46 -11
- src/message_writer.py +10 -5
- src/resume_loader.py +2 -2
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from src.resume_loader import ResumeLoaderFactory
|
| 3 |
from src.job_extractor import JobExtractor
|
|
|
|
| 4 |
from src.message_writer import MessageWriter
|
| 5 |
|
| 6 |
def main():
|
|
@@ -18,35 +19,61 @@ def main():
|
|
| 18 |
st.subheader("Upload Your Resume")
|
| 19 |
uploaded_file = st.file_uploader("Upload a PDF Resume", type=["pdf"])
|
| 20 |
|
| 21 |
-
# Input
|
| 22 |
-
|
| 23 |
-
"
|
| 24 |
-
|
| 25 |
)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Button to trigger the flow
|
| 28 |
if st.button("Generate Message"):
|
| 29 |
-
if job_url:
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
else:
|
| 47 |
st.error("Please provide a valid job URL.")
|
| 48 |
|
| 49 |
-
def generate_message_for_job(job_url, uploaded_file):
|
| 50 |
|
| 51 |
# Load the resume using the appropriate method (PDF or text)
|
| 52 |
if uploaded_file:
|
|
@@ -58,8 +85,12 @@ def generate_message_for_job(job_url, uploaded_file):
|
|
| 58 |
|
| 59 |
# Extract the key info from job URL
|
| 60 |
extractor = JobExtractor()
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
# Invoke chat model
|
| 65 |
writer = MessageWriter()
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from src.resume_loader import ResumeLoaderFactory
|
| 3 |
from src.job_extractor import JobExtractor
|
| 4 |
+
# from src.job_extractor_2 import JobExtractor2
|
| 5 |
from src.message_writer import MessageWriter
|
| 6 |
|
| 7 |
def main():
|
|
|
|
| 19 |
st.subheader("Upload Your Resume")
|
| 20 |
uploaded_file = st.file_uploader("Upload a PDF Resume", type=["pdf"])
|
| 21 |
|
| 22 |
+
# Job Input Option (Radio Buttons for Job URL or Description)
|
| 23 |
+
input_option = st.radio(
|
| 24 |
+
"How would you like to provide the job information?",
|
| 25 |
+
("Job URL", "Job Description")
|
| 26 |
)
|
| 27 |
|
| 28 |
+
job_url = None
|
| 29 |
+
job_description = None
|
| 30 |
+
|
| 31 |
+
# Show corresponding input field based on the selection
|
| 32 |
+
if input_option == "Job URL":
|
| 33 |
+
job_url = st.text_input(
|
| 34 |
+
"Enter the Job URL",
|
| 35 |
+
placeholder="https://amazon.jobs/en/jobs/2831138/software-development-engineer-2025-ai-ml"
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# Display the alert with color
|
| 39 |
+
with st.expander("🔔 **Alert!** Job URL Instructions", expanded=True):
|
| 40 |
+
st.markdown(
|
| 41 |
+
"""
|
| 42 |
+
<p style="color:red;">If using a LinkedIn job URL (Easy Apply), paste the job description instead.</p>
|
| 43 |
+
""", unsafe_allow_html=True)
|
| 44 |
+
|
| 45 |
+
elif input_option == "Job Description":
|
| 46 |
+
job_description = st.text_area("Enter the Job Description", height=200)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
# Button to trigger the flow
|
| 50 |
if st.button("Generate Message"):
|
| 51 |
+
if job_url or job_description:
|
| 52 |
+
try:
|
| 53 |
+
st.info("Processing your request...")
|
| 54 |
+
# Trigger the flow (replace with your logic)
|
| 55 |
+
thought, response = generate_message_for_job(job_url, uploaded_file, job_description)
|
| 56 |
+
|
| 57 |
+
# Create two columns for displaying outputs side by side
|
| 58 |
+
col1, col2 = st.columns(2)
|
| 59 |
+
|
| 60 |
+
# Display Thought Process in the first column
|
| 61 |
+
with col1:
|
| 62 |
+
st.subheader("DeepThink")
|
| 63 |
+
st.text_area(" ", value=thought, height=500)
|
| 64 |
+
|
| 65 |
+
# Display Generated Message in the second column
|
| 66 |
+
with col2:
|
| 67 |
+
st.subheader("Generated Message")
|
| 68 |
+
st.text_area(" ", value=response, height=500)
|
| 69 |
+
except ValueError as e:
|
| 70 |
+
st.error(f"Error: {e}")
|
| 71 |
+
except Exception as e:
|
| 72 |
+
st.error(f"Unexpected Error: {e}")
|
| 73 |
else:
|
| 74 |
st.error("Please provide a valid job URL.")
|
| 75 |
|
| 76 |
+
def generate_message_for_job(job_url, uploaded_file, job_description=None):
|
| 77 |
|
| 78 |
# Load the resume using the appropriate method (PDF or text)
|
| 79 |
if uploaded_file:
|
|
|
|
| 85 |
|
| 86 |
# Extract the key info from job URL
|
| 87 |
extractor = JobExtractor()
|
| 88 |
+
if job_url:
|
| 89 |
+
job_description = extractor.parse_job_from_web(job_url)
|
| 90 |
+
|
| 91 |
+
job = extractor.extract_jobdata(job_description)
|
| 92 |
+
if not job or not job.get('job_postings'):
|
| 93 |
+
raise ValueError(f"Cannot fetch job details from this url: {job_url}, Use the 'Job Description' field for better assistance!")
|
| 94 |
|
| 95 |
# Invoke chat model
|
| 96 |
writer = MessageWriter()
|
src/job_extractor.py
CHANGED
|
@@ -4,6 +4,9 @@ from langchain_core.prompts import PromptTemplate
|
|
| 4 |
from langchain_core.output_parsers import JsonOutputParser
|
| 5 |
from langchain_core.exceptions import OutputParserException
|
| 6 |
from src.utils import clean_text
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
class JobExtractor:
|
| 9 |
"""
|
|
@@ -46,6 +49,8 @@ class JobExtractor:
|
|
| 46 |
`role`, `experience`, `skills`, `responsibilities`, `basic qualifications`,
|
| 47 |
`preferred qualifications`, and `description`.
|
| 48 |
Only return the valid JSON.
|
|
|
|
|
|
|
| 49 |
### VALID JSON (NO PREAMBLE):
|
| 50 |
"""
|
| 51 |
)
|
|
@@ -71,15 +76,30 @@ class JobExtractor:
|
|
| 71 |
ValueError: If the content could not be loaded or cleaned properly.
|
| 72 |
"""
|
| 73 |
try:
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
| 75 |
page_data = loader.load().pop().page_content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
if not page_data:
|
| 77 |
-
raise ValueError("
|
|
|
|
|
|
|
|
|
|
| 78 |
cleaned_data = clean_text(page_data)
|
| 79 |
-
print(f"Scraped and cleaned data
|
| 80 |
return cleaned_data
|
| 81 |
except Exception as e:
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
def extract_jobdata(self, text):
|
| 85 |
"""
|
|
@@ -104,14 +124,29 @@ class JobExtractor:
|
|
| 104 |
extract_chain = self.extract_prompt | self.chat_model.groq
|
| 105 |
res = extract_chain.invoke(input={"page_data": text})
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
except OutputParserException as e:
|
| 114 |
-
raise OutputParserException("Unable to parse job data as valid JSON.
|
| 115 |
except Exception as e:
|
| 116 |
raise ValueError(f"An error occurred during job extraction: {e}") from e
|
| 117 |
|
|
|
|
|
|
| 4 |
from langchain_core.output_parsers import JsonOutputParser
|
| 5 |
from langchain_core.exceptions import OutputParserException
|
| 6 |
from src.utils import clean_text
|
| 7 |
+
import json
|
| 8 |
+
import requests
|
| 9 |
+
|
| 10 |
|
| 11 |
class JobExtractor:
|
| 12 |
"""
|
|
|
|
| 49 |
`role`, `experience`, `skills`, `responsibilities`, `basic qualifications`,
|
| 50 |
`preferred qualifications`, and `description`.
|
| 51 |
Only return the valid JSON.
|
| 52 |
+
If you do not find any data to form a JSON, return
|
| 53 |
+
```json{{'job_postings': []}}```
|
| 54 |
### VALID JSON (NO PREAMBLE):
|
| 55 |
"""
|
| 56 |
)
|
|
|
|
| 76 |
ValueError: If the content could not be loaded or cleaned properly.
|
| 77 |
"""
|
| 78 |
try:
|
| 79 |
+
headers = {
|
| 80 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
|
| 81 |
+
}
|
| 82 |
+
loader = WebBaseLoader(url, headers)
|
| 83 |
page_data = loader.load().pop().page_content
|
| 84 |
+
|
| 85 |
+
# Check for blocking or unsupported browser messages
|
| 86 |
+
if "unsupported browser" in page_data.lower():
|
| 87 |
+
raise ValueError(f"Unsupported browser message detected.")
|
| 88 |
+
# return None
|
| 89 |
+
|
| 90 |
if not page_data:
|
| 91 |
+
raise ValueError(f"Failed to fetch content from the URL {url}.")
|
| 92 |
+
|
| 93 |
+
print(f"===Page Data===\n {page_data}")
|
| 94 |
+
|
| 95 |
cleaned_data = clean_text(page_data)
|
| 96 |
+
print(f"=== Scraped and cleaned data ===\n {cleaned_data}...") # Displaying a snippet of data for debugging
|
| 97 |
return cleaned_data
|
| 98 |
except Exception as e:
|
| 99 |
+
print(f"WebBaseLoader Error: {e}")
|
| 100 |
+
# raise ValueError(f"Failed to fetch content from the URL {url}.")
|
| 101 |
+
return None
|
| 102 |
+
|
| 103 |
|
| 104 |
def extract_jobdata(self, text):
|
| 105 |
"""
|
|
|
|
| 124 |
extract_chain = self.extract_prompt | self.chat_model.groq
|
| 125 |
res = extract_chain.invoke(input={"page_data": text})
|
| 126 |
|
| 127 |
+
print(f"=== Result Content ===\n {res.content}")
|
| 128 |
+
|
| 129 |
+
if not res.content.strip(): # Check if response is empty
|
| 130 |
+
raise ValueError("No valid job data extracted.")
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
job_data = self.json_parser.parse(res.content)
|
| 134 |
+
print(f"=== JSON Job Data ===\n {job_data}")
|
| 135 |
+
return job_data
|
| 136 |
+
except json.decoder.JSONDecodeError:
|
| 137 |
+
print("Invalid JSON received. Returning empty job data.")
|
| 138 |
+
return {"job_postings": []} # Fail gracefully
|
| 139 |
+
|
| 140 |
+
except requests.exceptions.HTTPError as http_err:
|
| 141 |
+
if http_err.response.status_code == 413:
|
| 142 |
+
raise ValueError("The input is too large. Please reduce the size and try again.")
|
| 143 |
+
elif http_err.response.status_code == 429:
|
| 144 |
+
raise ValueError("Too many requests. Please try again later.")
|
| 145 |
+
else:
|
| 146 |
+
raise ValueError(f"HTTP error occurred: {http_err}") from http_err
|
| 147 |
except OutputParserException as e:
|
| 148 |
+
raise OutputParserException("Unable to parse job data as valid JSON.") from e
|
| 149 |
except Exception as e:
|
| 150 |
raise ValueError(f"An error occurred during job extraction: {e}") from e
|
| 151 |
|
| 152 |
+
|
src/message_writer.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from src.chat_model import ChatModel
|
| 2 |
from langchain_core.prompts import PromptTemplate
|
| 3 |
import re
|
|
|
|
| 4 |
|
| 5 |
class MessageWriter:
|
| 6 |
"""
|
|
@@ -106,18 +107,22 @@ class MessageWriter:
|
|
| 106 |
extracted_text = extracted_text.strip() # Strip leading/trailing whitespace and newlines
|
| 107 |
|
| 108 |
# Print the well-formatted text
|
| 109 |
-
print("
|
| 110 |
-
print(extracted_text)
|
| 111 |
think_content = extracted_text
|
| 112 |
else:
|
| 113 |
print("No content found between <think> and </think> tags.")
|
| 114 |
|
| 115 |
-
print("
|
| 116 |
-
print(cleaned_response)
|
| 117 |
|
| 118 |
# Return the extracted thought process and the cleaned email content
|
| 119 |
return think_content, cleaned_response.strip()
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
except Exception as e:
|
| 122 |
# Raise a ValueError with additional context if there was an error in processing
|
| 123 |
raise ValueError(f"An error occurred while generating the email: {e}") from e
|
|
|
|
| 1 |
from src.chat_model import ChatModel
|
| 2 |
from langchain_core.prompts import PromptTemplate
|
| 3 |
import re
|
| 4 |
+
import requests
|
| 5 |
|
| 6 |
class MessageWriter:
|
| 7 |
"""
|
|
|
|
| 107 |
extracted_text = extracted_text.strip() # Strip leading/trailing whitespace and newlines
|
| 108 |
|
| 109 |
# Print the well-formatted text
|
| 110 |
+
print(f"=== Thought Process ===\n {extracted_text}")
|
|
|
|
| 111 |
think_content = extracted_text
|
| 112 |
else:
|
| 113 |
print("No content found between <think> and </think> tags.")
|
| 114 |
|
| 115 |
+
print(f"=== Cleaned Response ===\n {cleaned_response}")
|
|
|
|
| 116 |
|
| 117 |
# Return the extracted thought process and the cleaned email content
|
| 118 |
return think_content, cleaned_response.strip()
|
| 119 |
+
except requests.exceptions.HTTPError as http_err:
|
| 120 |
+
if http_err.response.status_code == 413:
|
| 121 |
+
raise ValueError("The input is too large. Please reduce the size and try again.")
|
| 122 |
+
elif http_err.response.status_code == 429:
|
| 123 |
+
raise ValueError("Too many requests. Please try again later.")
|
| 124 |
+
else:
|
| 125 |
+
raise ValueError(f"HTTP error occurred: {http_err}") from http_err
|
| 126 |
except Exception as e:
|
| 127 |
# Raise a ValueError with additional context if there was an error in processing
|
| 128 |
raise ValueError(f"An error occurred while generating the email: {e}") from e
|
src/resume_loader.py
CHANGED
|
@@ -67,7 +67,7 @@ class TextResumeLoader(ResumeLoader):
|
|
| 67 |
|
| 68 |
text_loader = TextLoader(self.file_path)
|
| 69 |
resume = text_loader.load() # Directly load the full text without chunking
|
| 70 |
-
print(resume[0].page_content)
|
| 71 |
|
| 72 |
return resume[0]
|
| 73 |
|
|
@@ -118,7 +118,7 @@ class PdfResumeLoader(ResumeLoader):
|
|
| 118 |
pdf_loader = PyPDFLoader(temp_file_path)
|
| 119 |
resume = pdf_loader.load() # Extract text from PDF
|
| 120 |
|
| 121 |
-
print(resume[0].page_content)
|
| 122 |
return resume[0]
|
| 123 |
|
| 124 |
except Exception as e:
|
|
|
|
| 67 |
|
| 68 |
text_loader = TextLoader(self.file_path)
|
| 69 |
resume = text_loader.load() # Directly load the full text without chunking
|
| 70 |
+
print(f"=== Resume Content ===\n {resume[0].page_content}")
|
| 71 |
|
| 72 |
return resume[0]
|
| 73 |
|
|
|
|
| 118 |
pdf_loader = PyPDFLoader(temp_file_path)
|
| 119 |
resume = pdf_loader.load() # Extract text from PDF
|
| 120 |
|
| 121 |
+
print(f"=== Resume Content ===\n {resume[0].page_content}")
|
| 122 |
return resume[0]
|
| 123 |
|
| 124 |
except Exception as e:
|