Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| πͺ Scribd Cookie Extractor - Valideret Metode | |
| ============================================= | |
| Ekstraherer _scribd_session og _scribd_expire cookies fra Chrome | |
| ved at bruge din eksisterende Google OAuth login. | |
| Metode 1: browser_cookie3 (hurtigst - læser direkte fra Chrome DB) | |
| Metode 2: Selenium med Chrome profil (fallback - starter browser) | |
| Metode 3: Manuel cookie input (hvis alt andet fejler) | |
| @author WidgeTDC Neural Network | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import sqlite3 | |
| import shutil | |
| import tempfile | |
| from pathlib import Path | |
| from datetime import datetime | |
| from typing import Dict, Optional, Tuple | |
| import base64 | |
| # Attempt imports | |
| try: | |
| import browser_cookie3 | |
| HAS_BROWSER_COOKIE3 = True | |
| except ImportError: | |
| HAS_BROWSER_COOKIE3 = False | |
| try: | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.service import Service | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| HAS_SELENIUM = True | |
| except ImportError: | |
| HAS_SELENIUM = False | |
| try: | |
| from Crypto.Cipher import AES | |
| from Crypto.Protocol.KDF import PBKDF2 | |
| import win32crypt | |
| HAS_CRYPTO = True | |
| except ImportError: | |
| HAS_CRYPTO = False | |
| class ScribdCookieExtractor: | |
| """ | |
| Ekstraherer Scribd authentication cookies med multiple metoder | |
| """ | |
| COOKIE_FILE = Path("data/scribd_harvest/scribd_cookies.json") | |
| REQUIRED_COOKIES = ['_scribd_session', '_scribd_expire'] | |
| def __init__(self): | |
| self.cookies: Dict[str, str] = {} | |
| self.cookie_file = self.COOKIE_FILE | |
| self.cookie_file.parent.mkdir(parents=True, exist_ok=True) | |
| def get_cookies(self) -> Dict[str, str]: | |
| """ | |
| Hent Scribd cookies - prΓΈver alle metoder | |
| """ | |
| print("πͺ Scribd Cookie Extractor") | |
| print("=" * 50) | |
| # PrΓΈv at loade gemte cookies fΓΈrst | |
| if self._load_saved_cookies(): | |
| if self._validate_cookies(): | |
| print("β Bruger gemte cookies") | |
| return self.cookies | |
| # Metode 1: browser_cookie3 | |
| if HAS_BROWSER_COOKIE3: | |
| print("\nπ Metode 1: browser_cookie3 (Chrome database)") | |
| if self._extract_with_browser_cookie3(): | |
| if self._validate_cookies(): | |
| self._save_cookies() | |
| return self.cookies | |
| # Metode 2: Direct Chrome SQLite (Windows) | |
| if sys.platform == 'win32' and HAS_CRYPTO: | |
| print("\nπ Metode 2: Chrome SQLite direkte lΓ¦sning") | |
| if self._extract_from_chrome_sqlite(): | |
| if self._validate_cookies(): | |
| self._save_cookies() | |
| return self.cookies | |
| # Metode 3: Selenium med Chrome profil | |
| if HAS_SELENIUM: | |
| print("\nπ Metode 3: Selenium med Chrome profil") | |
| if self._extract_with_selenium(): | |
| if self._validate_cookies(): | |
| self._save_cookies() | |
| return self.cookies | |
| # Metode 4: Manuel input | |
| print("\nπ Metode 4: Manuel cookie input") | |
| if self._extract_manual(): | |
| self._save_cookies() | |
| return self.cookies | |
| print("\nβ Kunne ikke hente cookies") | |
| return {} | |
| def _extract_with_browser_cookie3(self) -> bool: | |
| """Metode 1: Brug browser_cookie3""" | |
| try: | |
| # PrΓΈv Chrome | |
| try: | |
| cj = browser_cookie3.chrome(domain_name=".scribd.com") | |
| for cookie in cj: | |
| self.cookies[cookie.name] = cookie.value | |
| if cookie.name in self.REQUIRED_COOKIES: | |
| print(f" β Fandt: {cookie.name}") | |
| return bool(self.cookies) | |
| except Exception as e: | |
| print(f" β οΈ Chrome: {e}") | |
| # PrΓΈv Edge | |
| try: | |
| cj = browser_cookie3.edge(domain_name=".scribd.com") | |
| for cookie in cj: | |
| self.cookies[cookie.name] = cookie.value | |
| return bool(self.cookies) | |
| except Exception as e: | |
| print(f" β οΈ Edge: {e}") | |
| return False | |
| except Exception as e: | |
| print(f" β browser_cookie3 fejlede: {e}") | |
| return False | |
| def _extract_from_chrome_sqlite(self) -> bool: | |
| """Metode 2: Læs direkte fra Chrome SQLite database (Windows)""" | |
| try: | |
| # Find Chrome profil | |
| local_app_data = os.environ.get('LOCALAPPDATA', '') | |
| chrome_path = Path(local_app_data) / "Google" / "Chrome" / "User Data" | |
| if not chrome_path.exists(): | |
| print(f" β οΈ Chrome profil ikke fundet: {chrome_path}") | |
| return False | |
| # Kopier cookies database (Chrome lΓ₯ser den) | |
| cookies_db = chrome_path / "Default" / "Network" / "Cookies" | |
| if not cookies_db.exists(): | |
| cookies_db = chrome_path / "Default" / "Cookies" | |
| if not cookies_db.exists(): | |
| print(f" β οΈ Cookies database ikke fundet") | |
| return False | |
| # Kopier til temp fil | |
| temp_db = Path(tempfile.gettempdir()) / "scribd_cookies_temp.db" | |
| shutil.copy2(cookies_db, temp_db) | |
| # Hent encryption key | |
| local_state_path = chrome_path / "Local State" | |
| key = self._get_chrome_encryption_key(local_state_path) | |
| # Læs cookies | |
| conn = sqlite3.connect(str(temp_db)) | |
| cursor = conn.cursor() | |
| cursor.execute(""" | |
| SELECT name, encrypted_value, host_key | |
| FROM cookies | |
| WHERE host_key LIKE '%scribd.com%' | |
| """) | |
| for name, encrypted_value, host in cursor.fetchall(): | |
| try: | |
| if key: | |
| decrypted = self._decrypt_chrome_cookie(encrypted_value, key) | |
| if decrypted: | |
| self.cookies[name] = decrypted | |
| if name in self.REQUIRED_COOKIES: | |
| print(f" β Dekrypteret: {name}") | |
| except Exception as e: | |
| pass | |
| conn.close() | |
| temp_db.unlink(missing_ok=True) | |
| return bool(self.cookies) | |
| except Exception as e: | |
| print(f" β SQLite lΓ¦sning fejlede: {e}") | |
| return False | |
| def _get_chrome_encryption_key(self, local_state_path: Path) -> Optional[bytes]: | |
| """Hent Chrome's encryption key""" | |
| try: | |
| with open(local_state_path, 'r', encoding='utf-8') as f: | |
| local_state = json.load(f) | |
| encrypted_key = base64.b64decode( | |
| local_state['os_crypt']['encrypted_key'] | |
| ) | |
| # Fjern "DPAPI" prefix | |
| encrypted_key = encrypted_key[5:] | |
| # Dekrypter med Windows DPAPI | |
| key = win32crypt.CryptUnprotectData(encrypted_key, None, None, None, 0)[1] | |
| return key | |
| except Exception as e: | |
| print(f" β οΈ Kunne ikke hente encryption key: {e}") | |
| return None | |
| def _decrypt_chrome_cookie(self, encrypted_value: bytes, key: bytes) -> Optional[str]: | |
| """Dekrypter Chrome cookie værdi""" | |
| try: | |
| if encrypted_value[:3] == b'v10' or encrypted_value[:3] == b'v11': | |
| # AES-GCM decryption | |
| nonce = encrypted_value[3:15] | |
| ciphertext = encrypted_value[15:] | |
| cipher = AES.new(key, AES.MODE_GCM, nonce=nonce) | |
| decrypted = cipher.decrypt(ciphertext) | |
| # Fjern authentication tag (sidste 16 bytes) | |
| return decrypted[:-16].decode('utf-8') | |
| else: | |
| # Gammel DPAPI encryption | |
| return win32crypt.CryptUnprotectData(encrypted_value, None, None, None, 0)[1].decode('utf-8') | |
| except: | |
| return None | |
| def _extract_with_selenium(self) -> bool: | |
| """Metode 3: Brug Selenium med eksisterende Chrome profil""" | |
| try: | |
| # Find Chrome profil | |
| local_app_data = os.environ.get('LOCALAPPDATA', '') | |
| chrome_profile = Path(local_app_data) / "Google" / "Chrome" / "User Data" | |
| options = Options() | |
| options.add_argument(f"--user-data-dir={chrome_profile}") | |
| options.add_argument("--profile-directory=Default") | |
| options.add_argument("--no-first-run") | |
| options.add_argument("--no-default-browser-check") | |
| # IKKE headless - vi vil bruge eksisterende session | |
| print(" π Γ bner Chrome med din profil...") | |
| print(" β³ Venter pΓ₯ at Scribd loader...") | |
| driver = webdriver.Chrome(options=options) | |
| try: | |
| # GΓ₯ til Scribd for at sikre cookies er sat | |
| driver.get("https://www.scribd.com/") | |
| # Vent pΓ₯ at siden loader | |
| WebDriverWait(driver, 10).until( | |
| EC.presence_of_element_located((By.TAG_NAME, "body")) | |
| ) | |
| # Hent alle cookies | |
| selenium_cookies = driver.get_cookies() | |
| for cookie in selenium_cookies: | |
| self.cookies[cookie['name']] = cookie['value'] | |
| if cookie['name'] in self.REQUIRED_COOKIES: | |
| print(f" β Fandt: {cookie['name']}") | |
| return bool(self.cookies) | |
| finally: | |
| driver.quit() | |
| except Exception as e: | |
| print(f" β Selenium fejlede: {e}") | |
| return False | |
| def _extract_manual(self) -> bool: | |
| """Metode 4: Manuel cookie input fra bruger""" | |
| print(""" | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β MANUEL COOKIE EXTRACTION β | |
| β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£ | |
| β 1. Γ bn Chrome og gΓ₯ til https://www.scribd.com β | |
| β 2. Log ind med din Google konto β | |
| β 3. Tryk F12 for Developer Tools β | |
| β 4. GΓ₯ til Application β Cookies β scribd.com β | |
| β 5. Find og kopier vΓ¦rdierne for: β | |
| β - _scribd_session β | |
| β - _scribd_expire β | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """) | |
| try: | |
| session = input("Indtast _scribd_session værdi: ").strip() | |
| if not session: | |
| return False | |
| expire = input("Indtast _scribd_expire værdi (tryk Enter hvis tom): ").strip() | |
| self.cookies['_scribd_session'] = session | |
| if expire: | |
| self.cookies['_scribd_expire'] = expire | |
| return True | |
| except: | |
| return False | |
| def _validate_cookies(self) -> bool: | |
| """Valider at vi har de nΓΈdvendige cookies""" | |
| has_session = '_scribd_session' in self.cookies and self.cookies['_scribd_session'] | |
| if has_session: | |
| print(" β _scribd_session fundet") | |
| return True | |
| print(" β οΈ Mangler _scribd_session cookie") | |
| return False | |
| def _save_cookies(self): | |
| """Gem cookies til fil""" | |
| data = { | |
| 'cookies': self.cookies, | |
| 'extracted_at': datetime.now().isoformat(), | |
| 'source': 'ScribdCookieExtractor' | |
| } | |
| with open(self.cookie_file, 'w') as f: | |
| json.dump(data, f, indent=2) | |
| print(f" πΎ Cookies gemt til: {self.cookie_file}") | |
| def _load_saved_cookies(self) -> bool: | |
| """Load gemte cookies""" | |
| if not self.cookie_file.exists(): | |
| return False | |
| try: | |
| with open(self.cookie_file, 'r') as f: | |
| data = json.load(f) | |
| self.cookies = data.get('cookies', {}) | |
| extracted_at = data.get('extracted_at', 'unknown') | |
| print(f" π Loaded cookies fra: {extracted_at}") | |
| return bool(self.cookies) | |
| except: | |
| return False | |
| def main(): | |
| """Test cookie extraction""" | |
| extractor = ScribdCookieExtractor() | |
| cookies = extractor.get_cookies() | |
| if cookies: | |
| print("\n" + "=" * 50) | |
| print("β SUCCESS - Cookies ekstraheret!") | |
| print("=" * 50) | |
| for name in ['_scribd_session', '_scribd_expire']: | |
| if name in cookies: | |
| value = cookies[name] | |
| print(f" {name}: {value[:50]}..." if len(value) > 50 else f" {name}: {value}") | |
| else: | |
| print("\nβ FEJL - Kunne ikke hente cookies") | |
| print(" PrΓΈv at:") | |
| print(" 1. Luk alle Chrome vinduer") | |
| print(" 2. KΓΈr scriptet igen") | |
| print(" 3. Eller brug manuel input") | |
| if __name__ == "__main__": | |
| main() | |