Spaces:

Sazzz02
/

Cold_Mail_RAG

Sleeping

App Files Files Community

Sazzz02 commited on Aug 8, 2025

Commit

feb024a

verified ·

1 Parent(s): f26da9a

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -61

app.py CHANGED Viewed

@@ -2,24 +2,45 @@ import gradio as gr
 import os
 import sys
 import uuid
-import tempfile
 import chromadb
 from langchain_groq import ChatGroq
-from langchain_community.document_loaders import WebBaseLoader, UnstructuredFileLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.prompts import PromptTemplate
 # Get API key from Hugging Face Secrets
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
-def generate_content(resume_file, job_url):
     """
-    Main function to generate the cover letter.
     """
     if not GROQ_API_KEY:
         return "❌ Error: Groq API key is not set in Hugging Face secrets. Please add it to your Space settings."
-    if not resume_file:
-        return "❌ Error: Please upload a resume."
     if not job_url:
         return "❌ Error: Please provide a job description URL."
@@ -34,75 +55,46 @@ def generate_content(resume_file, job_url):
     except Exception as e:
         return f"❌ Error: Invalid Groq API key or model unavailable. Details: {e}"
-    # --- 2. Process Resume ---
-    try:
-        # Gradio's File component provides a NamedTemporaryFile
-        loader = UnstructuredFileLoader(resume_file.name)
-        resume_text = loader.load()[0].page_content
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
-        resume_chunks = text_splitter.split_text(resume_text)
-    except Exception as e:
-        return f"❌ Error processing the resume file. Ensure it's a valid PDF. Error: {e}"
-    # --- 3. Set up the Resume Vector Database ---
-    client = chromadb.PersistentClient('resume_vectorstore')
-    collection = client.get_or_create_collection(name="resume_content")
-    # Clear old data before adding new
-    if collection.count() > 0:
-        collection.delete(ids=collection.get()['ids'])
-    ids = [str(uuid.uuid4()) for _ in range(len(resume_chunks))]
-    collection.add(documents=resume_chunks, ids=ids)
-    # --- 4. Web Scraping and JD Extraction ---
     try:
         loader = WebBaseLoader(job_url)
-        jd_text = loader.load().pop().page_content
     except Exception as e:
-        return f"❌ Error scraping the URL. Please check the URL. Error: {e}"
     prompt_extract = PromptTemplate.from_template(
         """### SCRAPED TEXT FROM WEBSITE: {page_data}
-        ### INSTRUCTION: Extract key skills, technologies, and responsibilities.
-        Return them as a list of strings. ### OUTPUT:"""
     )
-    chain_extract = prompt_extract | llm
-    jd_requirements = chain_extract.invoke(input={'page_data': jd_text}).content.split('\n')
-    # --- 5. Find Relevant Resume Content ---
-    relevant_resume_chunks = collection.query(
-        query_texts=jd_requirements,
-        n_results=5
-    ).get('documents', [])
-    # --- 6. Generate Cover Letter/Resume Content ---
-    prompt_content = PromptTemplate.from_template(
-        """### JOB REQUIREMENTS: {jd_requirements}
-        ### YOUR RESUME CONTENT: {resume_content}
-        ### INSTRUCTION: You are a career consultant. Write a professional and compelling cover letter.
-        ### COVER LETTER:"""
     )
-    chain_content = prompt_content | llm
-    generated_content = chain_content.invoke(
-        input={
-            'jd_requirements': "\n".join(jd_requirements),
-            'resume_content': "\n".join([item for sublist in relevant_resume_chunks for item in sublist])
-        }
-    ).content
-    return generated_content
 # --- Gradio UI ---
 iface = gr.Interface(
-    fn=generate_content,
     inputs=[
-        gr.File(label="Upload your resume (PDF)"),
         gr.Textbox(label="Job Posting URL"),
     ],
-    outputs=gr.Textbox(label="Generated Cover Letter"),
-    title="AI Resume Matcher and Content Generator",
-    description="Upload your resume and a job description to get a personalized cover letter.",
     theme="huggingface"
 )

 import os
 import sys
 import uuid
 import chromadb
+import pandas as pd
 from langchain_groq import ChatGroq
+from langchain_community.document_loaders import WebBaseLoader
 from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
 # Get API key from Hugging Face Secrets
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+# --- Initialize Vector Database on Startup ---
+# This part is crucial for loading your portfolio data
+try:
+    df = pd.read_csv("my_portfolio.csv")
+except FileNotFoundError:
+    raise FileNotFoundError("my_portfolio.csv not found. Please upload it to your Space.")
+client = chromadb.PersistentClient('vectorstore')
+collection = client.get_or_create_collection(name="portfolio")
+if collection.count() != len(df):
+    # Re-populate the collection if the data has changed or is empty
+    if collection.count() > 0:
+        collection.delete(ids=collection.get()['ids'])
+    for _, row in df.iterrows():
+        collection.add(documents=row["Techstack"],
+                       metadatas={"links": row["Links"]},
+                       ids=[str(uuid.uuid4())])
+    print("✅ Vector database populated with portfolio data.")
+else:
+    print("✅ Vector database already exists.")
+def generate_cold_mail(job_url):
     """
+    Main function to generate the cold mail content.
     """
     if not GROQ_API_KEY:
         return "❌ Error: Groq API key is not set in Hugging Face secrets. Please add it to your Space settings."
     if not job_url:
         return "❌ Error: Please provide a job description URL."
     except Exception as e:
         return f"❌ Error: Invalid Groq API key or model unavailable. Details: {e}"
+    # --- 2. Scrape and Extract Job Information ---
     try:
         loader = WebBaseLoader(job_url)
+        page_data = loader.load().pop().page_content
     except Exception as e:
+        return f"❌ Error scraping URL. Please check the URL. Error: {e}"
     prompt_extract = PromptTemplate.from_template(
         """### SCRAPED TEXT FROM WEBSITE: {page_data}
+        ### INSTRUCTION: Extract the job posting details and return them in JSON format with keys: `role`, `experience`, `skills` and `description`. Only return the valid JSON.
+        ### VALID JSON (NO PREAMBLE):"""
     )
+    json_parser = JsonOutputParser()
+    chain_extract = prompt_extract | llm | json_parser
+    job = chain_extract.invoke(input={'page_data': page_data})
+    # --- 3. Find Relevant Portfolio Links ---
+    job_skills = job.get('skills', [])
+    relevant_links = collection.query(query_texts=job_skills, n_results=2).get('metadatas', [])
+    # --- 4. Generate Cold Email ---
+    prompt_email = PromptTemplate.from_template(
+        """### JOB DESCRIPTION: {job_description}
+        ### INSTRUCTION: You are Mohan, a business development executive at AtliQ. Write a cold email to the client, describing AtliQ's capabilities in fulfilling their needs. Also add the most relevant ones from the following links to showcase Atliq's portfolio: {link_list}
+        ### EMAIL (NO PREAMBLE):"""
     )
+    chain_email = prompt_email | llm
+    email_content = chain_email.invoke({"job_description": str(job), "link_list": relevant_links})
+    return email_content.content
 # --- Gradio UI ---
 iface = gr.Interface(
+    fn=generate_cold_mail,
     inputs=[
         gr.Textbox(label="Job Posting URL"),
     ],
+    outputs=gr.Textbox(label="Generated Cold Mail"),
+    title="📧 AI Cold Mail Generator",
+    description="Provide a job description URL to generate a tailored cold email from AtliQ.",
     theme="huggingface"
 )