Sazzz02 commited on
Commit
feb024a
Β·
verified Β·
1 Parent(s): f26da9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -61
app.py CHANGED
@@ -2,24 +2,45 @@ import gradio as gr
2
  import os
3
  import sys
4
  import uuid
5
- import tempfile
6
  import chromadb
 
7
  from langchain_groq import ChatGroq
8
- from langchain_community.document_loaders import WebBaseLoader, UnstructuredFileLoader
9
- from langchain_text_splitters import RecursiveCharacterTextSplitter
10
  from langchain_core.prompts import PromptTemplate
 
11
 
12
  # Get API key from Hugging Face Secrets
13
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
14
 
15
- def generate_content(resume_file, job_url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  """
17
- Main function to generate the cover letter.
18
  """
19
  if not GROQ_API_KEY:
20
  return "❌ Error: Groq API key is not set in Hugging Face secrets. Please add it to your Space settings."
21
- if not resume_file:
22
- return "❌ Error: Please upload a resume."
23
  if not job_url:
24
  return "❌ Error: Please provide a job description URL."
25
 
@@ -34,75 +55,46 @@ def generate_content(resume_file, job_url):
34
  except Exception as e:
35
  return f"❌ Error: Invalid Groq API key or model unavailable. Details: {e}"
36
 
37
- # --- 2. Process Resume ---
38
- try:
39
- # Gradio's File component provides a NamedTemporaryFile
40
- loader = UnstructuredFileLoader(resume_file.name)
41
- resume_text = loader.load()[0].page_content
42
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
43
- resume_chunks = text_splitter.split_text(resume_text)
44
- except Exception as e:
45
- return f"❌ Error processing the resume file. Ensure it's a valid PDF. Error: {e}"
46
-
47
- # --- 3. Set up the Resume Vector Database ---
48
- client = chromadb.PersistentClient('resume_vectorstore')
49
- collection = client.get_or_create_collection(name="resume_content")
50
-
51
- # Clear old data before adding new
52
- if collection.count() > 0:
53
- collection.delete(ids=collection.get()['ids'])
54
-
55
- ids = [str(uuid.uuid4()) for _ in range(len(resume_chunks))]
56
- collection.add(documents=resume_chunks, ids=ids)
57
-
58
- # --- 4. Web Scraping and JD Extraction ---
59
  try:
60
  loader = WebBaseLoader(job_url)
61
- jd_text = loader.load().pop().page_content
62
  except Exception as e:
63
- return f"❌ Error scraping the URL. Please check the URL. Error: {e}"
64
-
65
  prompt_extract = PromptTemplate.from_template(
66
  """### SCRAPED TEXT FROM WEBSITE: {page_data}
67
- ### INSTRUCTION: Extract key skills, technologies, and responsibilities.
68
- Return them as a list of strings. ### OUTPUT:"""
69
  )
70
- chain_extract = prompt_extract | llm
71
- jd_requirements = chain_extract.invoke(input={'page_data': jd_text}).content.split('\n')
72
-
73
- # --- 5. Find Relevant Resume Content ---
74
- relevant_resume_chunks = collection.query(
75
- query_texts=jd_requirements,
76
- n_results=5
77
- ).get('documents', [])
78
 
79
- # --- 6. Generate Cover Letter/Resume Content ---
80
- prompt_content = PromptTemplate.from_template(
81
- """### JOB REQUIREMENTS: {jd_requirements}
82
- ### YOUR RESUME CONTENT: {resume_content}
83
- ### INSTRUCTION: You are a career consultant. Write a professional and compelling cover letter.
84
- ### COVER LETTER:"""
85
  )
86
- chain_content = prompt_content | llm
87
- generated_content = chain_content.invoke(
88
- input={
89
- 'jd_requirements': "\n".join(jd_requirements),
90
- 'resume_content': "\n".join([item for sublist in relevant_resume_chunks for item in sublist])
91
- }
92
- ).content
93
 
94
- return generated_content
95
 
96
  # --- Gradio UI ---
97
  iface = gr.Interface(
98
- fn=generate_content,
99
  inputs=[
100
- gr.File(label="Upload your resume (PDF)"),
101
  gr.Textbox(label="Job Posting URL"),
102
  ],
103
- outputs=gr.Textbox(label="Generated Cover Letter"),
104
- title="AI Resume Matcher and Content Generator",
105
- description="Upload your resume and a job description to get a personalized cover letter.",
106
  theme="huggingface"
107
  )
108
 
 
2
  import os
3
  import sys
4
  import uuid
 
5
  import chromadb
6
+ import pandas as pd
7
  from langchain_groq import ChatGroq
8
+ from langchain_community.document_loaders import WebBaseLoader
 
9
  from langchain_core.prompts import PromptTemplate
10
+ from langchain_core.output_parsers import JsonOutputParser
11
 
12
  # Get API key from Hugging Face Secrets
13
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
14
 
15
+ # --- Initialize Vector Database on Startup ---
16
+ # This part is crucial for loading your portfolio data
17
+ try:
18
+ df = pd.read_csv("my_portfolio.csv")
19
+ except FileNotFoundError:
20
+ raise FileNotFoundError("my_portfolio.csv not found. Please upload it to your Space.")
21
+
22
+ client = chromadb.PersistentClient('vectorstore')
23
+ collection = client.get_or_create_collection(name="portfolio")
24
+
25
+ if collection.count() != len(df):
26
+ # Re-populate the collection if the data has changed or is empty
27
+ if collection.count() > 0:
28
+ collection.delete(ids=collection.get()['ids'])
29
+
30
+ for _, row in df.iterrows():
31
+ collection.add(documents=row["Techstack"],
32
+ metadatas={"links": row["Links"]},
33
+ ids=[str(uuid.uuid4())])
34
+ print("βœ… Vector database populated with portfolio data.")
35
+ else:
36
+ print("βœ… Vector database already exists.")
37
+
38
+ def generate_cold_mail(job_url):
39
  """
40
+ Main function to generate the cold mail content.
41
  """
42
  if not GROQ_API_KEY:
43
  return "❌ Error: Groq API key is not set in Hugging Face secrets. Please add it to your Space settings."
 
 
44
  if not job_url:
45
  return "❌ Error: Please provide a job description URL."
46
 
 
55
  except Exception as e:
56
  return f"❌ Error: Invalid Groq API key or model unavailable. Details: {e}"
57
 
58
+ # --- 2. Scrape and Extract Job Information ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  try:
60
  loader = WebBaseLoader(job_url)
61
+ page_data = loader.load().pop().page_content
62
  except Exception as e:
63
+ return f"❌ Error scraping URL. Please check the URL. Error: {e}"
64
+
65
  prompt_extract = PromptTemplate.from_template(
66
  """### SCRAPED TEXT FROM WEBSITE: {page_data}
67
+ ### INSTRUCTION: Extract the job posting details and return them in JSON format with keys: `role`, `experience`, `skills` and `description`. Only return the valid JSON.
68
+ ### VALID JSON (NO PREAMBLE):"""
69
  )
70
+ json_parser = JsonOutputParser()
71
+ chain_extract = prompt_extract | llm | json_parser
72
+ job = chain_extract.invoke(input={'page_data': page_data})
73
+
74
+ # --- 3. Find Relevant Portfolio Links ---
75
+ job_skills = job.get('skills', [])
76
+ relevant_links = collection.query(query_texts=job_skills, n_results=2).get('metadatas', [])
 
77
 
78
+ # --- 4. Generate Cold Email ---
79
+ prompt_email = PromptTemplate.from_template(
80
+ """### JOB DESCRIPTION: {job_description}
81
+ ### INSTRUCTION: You are Mohan, a business development executive at AtliQ. Write a cold email to the client, describing AtliQ's capabilities in fulfilling their needs. Also add the most relevant ones from the following links to showcase Atliq's portfolio: {link_list}
82
+ ### EMAIL (NO PREAMBLE):"""
 
83
  )
84
+ chain_email = prompt_email | llm
85
+ email_content = chain_email.invoke({"job_description": str(job), "link_list": relevant_links})
 
 
 
 
 
86
 
87
+ return email_content.content
88
 
89
  # --- Gradio UI ---
90
  iface = gr.Interface(
91
+ fn=generate_cold_mail,
92
  inputs=[
 
93
  gr.Textbox(label="Job Posting URL"),
94
  ],
95
+ outputs=gr.Textbox(label="Generated Cold Mail"),
96
+ title="πŸ“§ AI Cold Mail Generator",
97
+ description="Provide a job description URL to generate a tailored cold email from AtliQ.",
98
  theme="huggingface"
99
  )
100