File size: 2,993 Bytes
0f72255
 
 
 
 
 
 
 
 
b8c3d32
 
 
 
 
 
 
 
0f72255
 
ba1903a
 
0f72255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba1903a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import streamlit as st
from groq import Groq
import pytesseract
from PIL import Image
import pdfplumber
from sentence_transformers import SentenceTransformer
import faiss
import tempfile
import subprocess

# Check if Tesseract is installed
try:
    pytesseract.get_tesseract_version()
except pytesseract.TesseractNotFoundError:
    st.error("Tesseract is not installed. Please ensure the environment has Tesseract installed.")
    st.stop()

# Initialize the GROQ API client
GROQ_API_KEY = "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"
client = Groq(api_key=GROQ_API_KEY)

# Streamlit app title
st.title("AI Job Cover Letter Generator")

# Step 1: Upload Files
st.header("Upload Your Files")
cv_file = st.file_uploader("Upload your CV (PDF format)", type="pdf")
job_ad_file = st.file_uploader("Upload the Job Ad (JPG format)", type="jpg")

if cv_file and job_ad_file:
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
        tmp_pdf.write(cv_file.read())
        pdf_path = tmp_pdf.name

    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_img:
        tmp_img.write(job_ad_file.read())
        img_path = tmp_img.name

    # Step 2: Extract Content
    st.header("Extracting Content")
    cv_text = ""
    job_ad_text = ""

    # Extract text from CV (PDF)
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            cv_text += page.extract_text() + "\n"

    # Extract text from Job Ad (JPG)
    job_ad_text = pytesseract.image_to_string(Image.open(img_path))

    st.subheader("Extracted CV Text")
    st.write(cv_text)

    st.subheader("Extracted Job Ad Text")
    st.write(job_ad_text)

    # Step 3: Chunking and Tokenization
    st.header("Processing Content")
    model = SentenceTransformer('all-MiniLM-L6-v2')

    # Create chunks
    def chunk_text(text, chunk_size=500):
        words = text.split()
        return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

    cv_chunks = chunk_text(cv_text)
    job_ad_chunks = chunk_text(job_ad_text)

    # Generate embeddings
    st.write("Generating embeddings...")
    cv_embeddings = model.encode(cv_chunks)
    job_ad_embeddings = model.encode(job_ad_chunks)

    # Store in FAISS
    index = faiss.IndexFlatL2(cv_embeddings.shape[1])
    index.add(cv_embeddings)

    # Step 4: Interact with GROQ API
    st.header("Generating Cover Letter")
    query = f"Generate a cover letter based on the following CV and job ad: {cv_text}\nJob Ad: {job_ad_text}"

    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "user", "content": query}
        ],
        model="llama-3.3-70b-versatile",
        stream=False,
    )

    cover_letter = chat_completion.choices[0].message.content
    st.subheader("Generated Cover Letter")
    st.write(cover_letter)

# Instructions when no file is uploaded
else:
    st.info("Please upload both your CV and the job ad to proceed.")