Cold_Mail_RAG / app.py
Sazzz02's picture
Update app.py
feb024a verified
import gradio as gr
import os
import sys
import uuid
import chromadb
import pandas as pd
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
# Get API key from Hugging Face Secrets
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# --- Initialize Vector Database on Startup ---
# This part is crucial for loading your portfolio data
try:
df = pd.read_csv("my_portfolio.csv")
except FileNotFoundError:
raise FileNotFoundError("my_portfolio.csv not found. Please upload it to your Space.")
client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")
if collection.count() != len(df):
# Re-populate the collection if the data has changed or is empty
if collection.count() > 0:
collection.delete(ids=collection.get()['ids'])
for _, row in df.iterrows():
collection.add(documents=row["Techstack"],
metadatas={"links": row["Links"]},
ids=[str(uuid.uuid4())])
print("βœ… Vector database populated with portfolio data.")
else:
print("βœ… Vector database already exists.")
def generate_cold_mail(job_url):
"""
Main function to generate the cold mail content.
"""
if not GROQ_API_KEY:
return "❌ Error: Groq API key is not set in Hugging Face secrets. Please add it to your Space settings."
if not job_url:
return "❌ Error: Please provide a job description URL."
# --- 1. Validate Groq API Key ---
try:
llm = ChatGroq(
temperature=0,
groq_api_key=GROQ_API_KEY,
model_name="llama3-70b-8192"
)
llm.invoke("Test LLM connection.")
except Exception as e:
return f"❌ Error: Invalid Groq API key or model unavailable. Details: {e}"
# --- 2. Scrape and Extract Job Information ---
try:
loader = WebBaseLoader(job_url)
page_data = loader.load().pop().page_content
except Exception as e:
return f"❌ Error scraping URL. Please check the URL. Error: {e}"
prompt_extract = PromptTemplate.from_template(
"""### SCRAPED TEXT FROM WEBSITE: {page_data}
### INSTRUCTION: Extract the job posting details and return them in JSON format with keys: `role`, `experience`, `skills` and `description`. Only return the valid JSON.
### VALID JSON (NO PREAMBLE):"""
)
json_parser = JsonOutputParser()
chain_extract = prompt_extract | llm | json_parser
job = chain_extract.invoke(input={'page_data': page_data})
# --- 3. Find Relevant Portfolio Links ---
job_skills = job.get('skills', [])
relevant_links = collection.query(query_texts=job_skills, n_results=2).get('metadatas', [])
# --- 4. Generate Cold Email ---
prompt_email = PromptTemplate.from_template(
"""### JOB DESCRIPTION: {job_description}
### INSTRUCTION: You are Mohan, a business development executive at AtliQ. Write a cold email to the client, describing AtliQ's capabilities in fulfilling their needs. Also add the most relevant ones from the following links to showcase Atliq's portfolio: {link_list}
### EMAIL (NO PREAMBLE):"""
)
chain_email = prompt_email | llm
email_content = chain_email.invoke({"job_description": str(job), "link_list": relevant_links})
return email_content.content
# --- Gradio UI ---
iface = gr.Interface(
fn=generate_cold_mail,
inputs=[
gr.Textbox(label="Job Posting URL"),
],
outputs=gr.Textbox(label="Generated Cold Mail"),
title="πŸ“§ AI Cold Mail Generator",
description="Provide a job description URL to generate a tailored cold email from AtliQ.",
theme="huggingface"
)
iface.launch()