Spaces:

Akarrahe
/

IQKillerv2

No application file

File size: 4,967 Bytes

0939a57

#!/usr/bin/env python3
"""
Standalone PDF Resume Text Extractor for IQKiller
Run this script to extract text from your resume PDF, then copy the output into IQKiller
"""

import sys
import os
from pathlib import Path

# PDF processing imports with error handling
try:
    import PyPDF2
    PYPDF2_AVAILABLE = True
except ImportError:
    PYPDF2_AVAILABLE = False

try:
    import pdfplumber
    PDFPLUMBER_AVAILABLE = True
except ImportError:
    PDFPLUMBER_AVAILABLE = False

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF using available libraries"""
    
    if not os.path.exists(pdf_path):
        return f"❌ Error: File '{pdf_path}' not found!"
    
    extracted_text = ""
    errors = []
    
    # Try pdfplumber first (usually better for formatted text)
    if PDFPLUMBER_AVAILABLE:
        try:
            print("🔄 Trying pdfplumber extraction...")
            with pdfplumber.open(pdf_path) as pdf:
                text_parts = []
                for page_num, page in enumerate(pdf.pages, 1):
                    page_text = page.extract_text()
                    if page_text:
                        text_parts.append(f"--- Page {page_num} ---\n{page_text}\n")
                
                if text_parts:
                    extracted_text = "\n".join(text_parts)
                    print(f"✅ Successfully extracted {len(extracted_text)} characters using pdfplumber")
                    return extracted_text
        except Exception as e:
            errors.append(f"pdfplumber failed: {e}")
            print(f"⚠️  pdfplumber failed: {e}")
    
    # Fallback to PyPDF2
    if PYPDF2_AVAILABLE and not extracted_text:
        try:
            print("🔄 Trying PyPDF2 extraction...")
            with open(pdf_path, 'rb') as file:
                reader = PyPDF2.PdfReader(file)
                text_parts = []
                
                for page_num, page in enumerate(reader.pages, 1):
                    page_text = page.extract_text()
                    if page_text:
                        text_parts.append(f"--- Page {page_num} ---\n{page_text}\n")
                
                if text_parts:
                    extracted_text = "\n".join(text_parts)
                    print(f"✅ Successfully extracted {len(extracted_text)} characters using PyPDF2")
                    return extracted_text
        except Exception as e:
            errors.append(f"PyPDF2 failed: {e}")
            print(f"⚠️  PyPDF2 failed: {e}")
    
    # If no libraries available or all failed
    if not PDFPLUMBER_AVAILABLE and not PYPDF2_AVAILABLE:
        return """❌ Error: No PDF processing libraries available!
        
Please install PDF processing libraries:
pip install PyPDF2 pdfplumber

Then run this script again."""
    
    if errors and not extracted_text:
        return f"""❌ Error: Could not extract text from PDF!
        
Errors encountered:
{chr(10).join(f"• {error}" for error in errors)}

Try:
1. Ensuring the PDF is not password-protected
2. Converting the PDF to a simpler format
3. Copy-pasting text manually from the PDF"""
    
    return "❌ No text could be extracted from this PDF."

def main():
    """Main function to run the PDF text extractor"""
    
    print("🎯 IQKiller PDF Resume Text Extractor")
    print("=" * 50)
    
    # Check if file path provided as argument
    if len(sys.argv) > 1:
        pdf_path = sys.argv[1]
    else:
        # Interactive mode
        print("📁 Please provide the path to your resume PDF:")
        print("   Example: /Users/username/Documents/resume.pdf")
        print("   Or drag and drop your PDF file here:")
        pdf_path = input("\nPDF Path: ").strip().strip('"').strip("'")
    
    if not pdf_path:
        print("❌ No file path provided!")
        return
    
    # Extract text
    print(f"\n🔄 Processing: {pdf_path}")
    print("-" * 50)
    
    extracted_text = extract_text_from_pdf(pdf_path)
    
    print("\n" + "=" * 50)
    print("📄 EXTRACTED TEXT")
    print("=" * 50)
    print(extracted_text)
    print("=" * 50)
    
    # Instructions for use
    print("\n📋 INSTRUCTIONS:")
    print("1. Copy the extracted text above")
    print("2. Open IQKiller in your browser: http://localhost:7860")
    print("3. Paste the text into the 'Resume Text' field")
    print("4. Add your job URL or description")
    print("5. Generate your personalized interview guide!")
    
    # Save to file option
    save_option = input("\n💾 Save extracted text to file? (y/n): ").lower().strip()
    if save_option in ['y', 'yes']:
        output_file = Path(pdf_path).stem + "_extracted_text.txt"
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(extracted_text)
            print(f"✅ Text saved to: {output_file}")
        except Exception as e:
            print(f"❌ Could not save file: {e}")

if __name__ == "__main__":
    main()