Ahsan-Shah's picture
Update app.py
b8c3d32 verified
import os
import streamlit as st
from groq import Groq
import pytesseract
from PIL import Image
import pdfplumber
from sentence_transformers import SentenceTransformer
import faiss
import tempfile
import subprocess
# Check if Tesseract is installed
try:
pytesseract.get_tesseract_version()
except pytesseract.TesseractNotFoundError:
st.error("Tesseract is not installed. Please ensure the environment has Tesseract installed.")
st.stop()
# Initialize the GROQ API client
GROQ_API_KEY = "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"
client = Groq(api_key=GROQ_API_KEY)
# Streamlit app title
st.title("AI Job Cover Letter Generator")
# Step 1: Upload Files
st.header("Upload Your Files")
cv_file = st.file_uploader("Upload your CV (PDF format)", type="pdf")
job_ad_file = st.file_uploader("Upload the Job Ad (JPG format)", type="jpg")
if cv_file and job_ad_file:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
tmp_pdf.write(cv_file.read())
pdf_path = tmp_pdf.name
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_img:
tmp_img.write(job_ad_file.read())
img_path = tmp_img.name
# Step 2: Extract Content
st.header("Extracting Content")
cv_text = ""
job_ad_text = ""
# Extract text from CV (PDF)
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
cv_text += page.extract_text() + "\n"
# Extract text from Job Ad (JPG)
job_ad_text = pytesseract.image_to_string(Image.open(img_path))
st.subheader("Extracted CV Text")
st.write(cv_text)
st.subheader("Extracted Job Ad Text")
st.write(job_ad_text)
# Step 3: Chunking and Tokenization
st.header("Processing Content")
model = SentenceTransformer('all-MiniLM-L6-v2')
# Create chunks
def chunk_text(text, chunk_size=500):
words = text.split()
return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
cv_chunks = chunk_text(cv_text)
job_ad_chunks = chunk_text(job_ad_text)
# Generate embeddings
st.write("Generating embeddings...")
cv_embeddings = model.encode(cv_chunks)
job_ad_embeddings = model.encode(job_ad_chunks)
# Store in FAISS
index = faiss.IndexFlatL2(cv_embeddings.shape[1])
index.add(cv_embeddings)
# Step 4: Interact with GROQ API
st.header("Generating Cover Letter")
query = f"Generate a cover letter based on the following CV and job ad: {cv_text}\nJob Ad: {job_ad_text}"
chat_completion = client.chat.completions.create(
messages=[
{"role": "user", "content": query}
],
model="llama-3.3-70b-versatile",
stream=False,
)
cover_letter = chat_completion.choices[0].message.content
st.subheader("Generated Cover Letter")
st.write(cover_letter)
# Instructions when no file is uploaded
else:
st.info("Please upload both your CV and the job ad to proceed.")