Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from groq import Groq | |
| import pytesseract | |
| from PIL import Image | |
| import pdfplumber | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import tempfile | |
| import subprocess | |
| # Check if Tesseract is installed | |
| try: | |
| pytesseract.get_tesseract_version() | |
| except pytesseract.TesseractNotFoundError: | |
| st.error("Tesseract is not installed. Please ensure the environment has Tesseract installed.") | |
| st.stop() | |
| # Initialize the GROQ API client | |
| GROQ_API_KEY = "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" | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Streamlit app title | |
| st.title("AI Job Cover Letter Generator") | |
| # Step 1: Upload Files | |
| st.header("Upload Your Files") | |
| cv_file = st.file_uploader("Upload your CV (PDF format)", type="pdf") | |
| job_ad_file = st.file_uploader("Upload the Job Ad (JPG format)", type="jpg") | |
| if cv_file and job_ad_file: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf: | |
| tmp_pdf.write(cv_file.read()) | |
| pdf_path = tmp_pdf.name | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_img: | |
| tmp_img.write(job_ad_file.read()) | |
| img_path = tmp_img.name | |
| # Step 2: Extract Content | |
| st.header("Extracting Content") | |
| cv_text = "" | |
| job_ad_text = "" | |
| # Extract text from CV (PDF) | |
| with pdfplumber.open(pdf_path) as pdf: | |
| for page in pdf.pages: | |
| cv_text += page.extract_text() + "\n" | |
| # Extract text from Job Ad (JPG) | |
| job_ad_text = pytesseract.image_to_string(Image.open(img_path)) | |
| st.subheader("Extracted CV Text") | |
| st.write(cv_text) | |
| st.subheader("Extracted Job Ad Text") | |
| st.write(job_ad_text) | |
| # Step 3: Chunking and Tokenization | |
| st.header("Processing Content") | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Create chunks | |
| def chunk_text(text, chunk_size=500): | |
| words = text.split() | |
| return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] | |
| cv_chunks = chunk_text(cv_text) | |
| job_ad_chunks = chunk_text(job_ad_text) | |
| # Generate embeddings | |
| st.write("Generating embeddings...") | |
| cv_embeddings = model.encode(cv_chunks) | |
| job_ad_embeddings = model.encode(job_ad_chunks) | |
| # Store in FAISS | |
| index = faiss.IndexFlatL2(cv_embeddings.shape[1]) | |
| index.add(cv_embeddings) | |
| # Step 4: Interact with GROQ API | |
| st.header("Generating Cover Letter") | |
| query = f"Generate a cover letter based on the following CV and job ad: {cv_text}\nJob Ad: {job_ad_text}" | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| {"role": "user", "content": query} | |
| ], | |
| model="llama-3.3-70b-versatile", | |
| stream=False, | |
| ) | |
| cover_letter = chat_completion.choices[0].message.content | |
| st.subheader("Generated Cover Letter") | |
| st.write(cover_letter) | |
| # Instructions when no file is uploaded | |
| else: | |
| st.info("Please upload both your CV and the job ad to proceed.") | |