File size: 4,967 Bytes
0939a57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""
Standalone PDF Resume Text Extractor for IQKiller
Run this script to extract text from your resume PDF, then copy the output into IQKiller
"""

import sys
import os
from pathlib import Path

# PDF processing imports with error handling
try:
    import PyPDF2
    PYPDF2_AVAILABLE = True
except ImportError:
    PYPDF2_AVAILABLE = False

try:
    import pdfplumber
    PDFPLUMBER_AVAILABLE = True
except ImportError:
    PDFPLUMBER_AVAILABLE = False

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF using available libraries"""
    
    if not os.path.exists(pdf_path):
        return f"❌ Error: File '{pdf_path}' not found!"
    
    extracted_text = ""
    errors = []
    
    # Try pdfplumber first (usually better for formatted text)
    if PDFPLUMBER_AVAILABLE:
        try:
            print("πŸ”„ Trying pdfplumber extraction...")
            with pdfplumber.open(pdf_path) as pdf:
                text_parts = []
                for page_num, page in enumerate(pdf.pages, 1):
                    page_text = page.extract_text()
                    if page_text:
                        text_parts.append(f"--- Page {page_num} ---\n{page_text}\n")
                
                if text_parts:
                    extracted_text = "\n".join(text_parts)
                    print(f"βœ… Successfully extracted {len(extracted_text)} characters using pdfplumber")
                    return extracted_text
        except Exception as e:
            errors.append(f"pdfplumber failed: {e}")
            print(f"⚠️  pdfplumber failed: {e}")
    
    # Fallback to PyPDF2
    if PYPDF2_AVAILABLE and not extracted_text:
        try:
            print("πŸ”„ Trying PyPDF2 extraction...")
            with open(pdf_path, 'rb') as file:
                reader = PyPDF2.PdfReader(file)
                text_parts = []
                
                for page_num, page in enumerate(reader.pages, 1):
                    page_text = page.extract_text()
                    if page_text:
                        text_parts.append(f"--- Page {page_num} ---\n{page_text}\n")
                
                if text_parts:
                    extracted_text = "\n".join(text_parts)
                    print(f"βœ… Successfully extracted {len(extracted_text)} characters using PyPDF2")
                    return extracted_text
        except Exception as e:
            errors.append(f"PyPDF2 failed: {e}")
            print(f"⚠️  PyPDF2 failed: {e}")
    
    # If no libraries available or all failed
    if not PDFPLUMBER_AVAILABLE and not PYPDF2_AVAILABLE:
        return """❌ Error: No PDF processing libraries available!
        
Please install PDF processing libraries:
pip install PyPDF2 pdfplumber

Then run this script again."""
    
    if errors and not extracted_text:
        return f"""❌ Error: Could not extract text from PDF!
        
Errors encountered:
{chr(10).join(f"β€’ {error}" for error in errors)}

Try:
1. Ensuring the PDF is not password-protected
2. Converting the PDF to a simpler format
3. Copy-pasting text manually from the PDF"""
    
    return "❌ No text could be extracted from this PDF."

def main():
    """Main function to run the PDF text extractor"""
    
    print("🎯 IQKiller PDF Resume Text Extractor")
    print("=" * 50)
    
    # Check if file path provided as argument
    if len(sys.argv) > 1:
        pdf_path = sys.argv[1]
    else:
        # Interactive mode
        print("πŸ“ Please provide the path to your resume PDF:")
        print("   Example: /Users/username/Documents/resume.pdf")
        print("   Or drag and drop your PDF file here:")
        pdf_path = input("\nPDF Path: ").strip().strip('"').strip("'")
    
    if not pdf_path:
        print("❌ No file path provided!")
        return
    
    # Extract text
    print(f"\nπŸ”„ Processing: {pdf_path}")
    print("-" * 50)
    
    extracted_text = extract_text_from_pdf(pdf_path)
    
    print("\n" + "=" * 50)
    print("πŸ“„ EXTRACTED TEXT")
    print("=" * 50)
    print(extracted_text)
    print("=" * 50)
    
    # Instructions for use
    print("\nπŸ“‹ INSTRUCTIONS:")
    print("1. Copy the extracted text above")
    print("2. Open IQKiller in your browser: http://localhost:7860")
    print("3. Paste the text into the 'Resume Text' field")
    print("4. Add your job URL or description")
    print("5. Generate your personalized interview guide!")
    
    # Save to file option
    save_option = input("\nπŸ’Ύ Save extracted text to file? (y/n): ").lower().strip()
    if save_option in ['y', 'yes']:
        output_file = Path(pdf_path).stem + "_extracted_text.txt"
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(extracted_text)
            print(f"βœ… Text saved to: {output_file}")
        except Exception as e:
            print(f"❌ Could not save file: {e}")

if __name__ == "__main__":
    main()