Spaces:

rastof9
/

fb

Runtime error

App Files Files Community

rastof9 commited on Mar 9, 2025

Commit

092e58d

1 Parent(s): d488241

Saving local changes before rebase

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env +9 -0
.env.example +5 -2
app.py +12 -2
app/__init__.py +34 -16
app/__pycache__/__init__.cpython-312.pyc +0 -0
app/models/google_ad.py +51 -0
app/routes/compliance.py +8 -8
app/routes/google_ads.py +188 -0
app/services/ai_processor.py +9 -2
app/services/google_scraper.py +172 -0
app/templates/base.html +43 -11
app/templates/dashboard.html +85 -21
app/templates/google_ads/display.html +95 -0
app/templates/google_ads/index.html +49 -0
app/templates/google_ads/results.html +252 -0
app/templates/google_ads/search.html +80 -0
app/utils/decorators.py +1 -1
config.py +36 -4
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/INSTALLER +0 -1
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/LICENSE +0 -20
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/METADATA +0 -46
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/RECORD +0 -43
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/WHEEL +0 -5
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/top_level.txt +0 -2
hf_env/Lib/site-packages/__pycache__/typing_extensions.cpython-312.pyc +0 -0
hf_env/Lib/site-packages/_yaml/__init__.py +0 -33
hf_env/Lib/site-packages/_yaml/__pycache__/__init__.cpython-312.pyc +0 -0
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/INSTALLER +0 -1
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/LICENSE +0 -20
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/METADATA +0 -77
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/RECORD +0 -14
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/WHEEL +0 -5
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/top_level.txt +0 -1
hf_env/Lib/site-packages/certifi/__init__.py +0 -4
hf_env/Lib/site-packages/certifi/__main__.py +0 -12
hf_env/Lib/site-packages/certifi/__pycache__/__init__.cpython-312.pyc +0 -0
hf_env/Lib/site-packages/certifi/__pycache__/__main__.cpython-312.pyc +0 -0
hf_env/Lib/site-packages/certifi/__pycache__/core.cpython-312.pyc +0 -0
hf_env/Lib/site-packages/certifi/cacert.pem +0 -0
hf_env/Lib/site-packages/certifi/core.py +0 -114
hf_env/Lib/site-packages/certifi/py.typed +0 -0
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER +0 -1
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE +0 -21
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/METADATA +0 -721
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/RECORD +0 -35
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL +0 -5
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt +0 -2
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt +0 -1
hf_env/Lib/site-packages/charset_normalizer/__init__.py +0 -48
hf_env/Lib/site-packages/charset_normalizer/__main__.py +0 -6

.env ADDED Viewed

	@@ -0,0 +1,9 @@

+FLASK_APP=app.py
+FLASK_ENV=development
+SECRET_KEY=your-secret-key-here
+DATABASE_URL=postgresql://user:password@localhost:5432/facebook_ads
+CELERY_BROKER_URL=redis://localhost:6379/0
+CELERY_RESULT_BACKEND=redis://localhost:6379/0
+OPENAI_API_KEY=your-openai-api-key-here
+INSTANCE_PATH=/tmp/instance
+SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub

.env.example CHANGED Viewed

@@ -1,6 +1,9 @@
 FLASK_APP=app.py
 FLASK_ENV=development
 SECRET_KEY=your-secret-key-here
-DATABASE_URL=postgresql://user:password@localhost:5432/dbname
-REDIS_URL=redis://localhost:6379/0
 SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub

 FLASK_APP=app.py
 FLASK_ENV=development
 SECRET_KEY=your-secret-key-here
+DATABASE_URL=postgresql://user:password@localhost:5432/facebook_ads
+CELERY_BROKER_URL=redis://localhost:6379/0
+CELERY_RESULT_BACKEND=redis://localhost:6379/0
+OPENAI_API_KEY=your-openai-api-key-here
+INSTANCE_PATH=/tmp/instance
 SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub

app.py CHANGED Viewed

@@ -1,6 +1,16 @@
 from flask import Flask
 def create_app():
-    app = Flask(__name__)
-    app.config['INSTANCE_PATH'] = '/tmp/instance'  # Ensure this path exists
     return app

 from flask import Flask
+from flask_migrate import Migrate
+from app import db, create_app as create_flask_app
+from config import get_config
+migrate = Migrate()
 def create_app():
+    app = create_flask_app()
+    app.config.from_object(get_config())
+    migrate.init_app(app, db)
     return app
+if __name__ == "__main__":
+    app = create_app()
+    app.run(debug=True)

app/__init__.py CHANGED Viewed

@@ -3,35 +3,53 @@ from flask_sqlalchemy import SQLAlchemy
 from flask_login import LoginManager
 from celery import Celery
 import redis
 db = SQLAlchemy()
 login = LoginManager()
 celery = Celery(__name__)
-cache = redis.Redis()
-def create_app():
-    # Create the Flask app first
     app = Flask(__name__)
     # Load configuration
-    app.config.from_object('config.Config')
-    # Set the instance path after loading the config
     app.instance_path = app.config['INSTANCE_PATH']
     # Initialize extensions
     db.init_app(app)
     login.init_app(app)
     celery.conf.update(app.config)
     # Register Blueprints
-    from .routes.auth import auth_bp
-    from .routes.dashboard import dashboard_bp
-    from .routes.api import api_bp
-    from .routes.compliance import compliance_bp
-    app.register_blueprint(auth_bp)
-    app.register_blueprint(dashboard_bp)
-    app.register_blueprint(api_bp)
-    app.register_blueprint(compliance_bp)
     return app

 from flask_login import LoginManager
 from celery import Celery
 import redis
+import os
+from pathlib import Path
+# Initialize extensions
 db = SQLAlchemy()
 login = LoginManager()
+login.login_view = 'auth.login'
 celery = Celery(__name__)
+cache = None  # Initialize later when app context is available
+def create_app(config_class=None):
+    # Create the Flask app
     app = Flask(__name__)
     # Load configuration
+    if config_class is None:
+        app.config.from_object('config.Config')
+    else:
+        app.config.from_object(config_class)
+    # Ensure instance path exists
+    Path(app.config['INSTANCE_PATH']).mkdir(parents=True, exist_ok=True)
     app.instance_path = app.config['INSTANCE_PATH']
     # Initialize extensions
     db.init_app(app)
     login.init_app(app)
+    # Configure Celery
     celery.conf.update(app.config)
+    # Initialize Redis cache
+    global cache
+    cache = redis.Redis.from_url(app.config['CELERY_BROKER_URL'])
     # Register Blueprints
+    with app.app_context():
+        from .routes.auth import auth_bp
+        from .routes.dashboard import dashboard_bp
+        from .routes.api import api_bp
+        from .routes.compliance import compliance_bp
+        from .routes.google_ads import google_ads_bp
+        app.register_blueprint(auth_bp)
+        app.register_blueprint(dashboard_bp)
+        app.register_blueprint(api_bp)
+        app.register_blueprint(compliance_bp)
+        app.register_blueprint(google_ads_bp)
     return app

app/__pycache__/__init__.cpython-312.pyc CHANGED Viewed

Binary files a/app/__pycache__/__init__.cpython-312.pyc and b/app/__pycache__/__init__.cpython-312.pyc differ

app/models/google_ad.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from app import db
+from datetime import datetime
+import uuid
+class GoogleAd(db.Model):
+    """Model for storing Google Ads data."""
+    id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+    ad_type = db.Column(db.String(20), nullable=False)  # 'search' or 'display'
+    title = db.Column(db.String(255), nullable=True)
+    description = db.Column(db.Text, nullable=True)
+    display_url = db.Column(db.String(255), nullable=True)
+    target_url = db.Column(db.String(512), nullable=True)
+    image_url = db.Column(db.String(512), nullable=True)
+    position = db.Column(db.Integer, nullable=True)
+    search_query = db.Column(db.String(255), nullable=True)
+    page_url = db.Column(db.String(512), nullable=True)
+    raw_data = db.Column(db.JSON, nullable=True)
+    sentiment = db.Column(db.JSON, nullable=True)
+    created_at = db.Column(db.DateTime, default=datetime.utcnow)
+    updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=True)
+    def __repr__(self):
+        return f'<GoogleAd {self.id} - {self.title}>'
+    @classmethod
+    def from_search_ad_data(cls, ad_data, search_query, user_id=None):
+        """Create a GoogleAd instance from scraped search ad data."""
+        return cls(
+            ad_type='search',
+            title=ad_data.get('title'),
+            description=ad_data.get('description'),
+            display_url=ad_data.get('display_url'),
+            target_url=ad_data.get('target_url'),
+            position=ad_data.get('position'),
+            search_query=search_query,
+            raw_data=ad_data,
+            user_id=user_id
+        )
+    @classmethod
+    def from_display_ad_data(cls, ad_data, user_id=None):
+        """Create a GoogleAd instance from scraped display ad data."""
+        return cls(
+            ad_type='display',
+            image_url=ad_data.get('image_url'),
+            target_url=ad_data.get('target_url'),
+            page_url=ad_data.get('page_url'),
+            raw_data=ad_data,
+            user_id=user_id
+        )

app/routes/compliance.py CHANGED Viewed

@@ -3,15 +3,11 @@ from flask_login import login_required
 from ..models import Ad
 from ..utils.decorators import admin_required
 from .. import db
-compliance_bp = Blueprint('compliance', __name__)
-@compliance_bp.route('/report')
-@login_required
-@admin_required
-def compliance_report():
-    ads = Ad.query.all()
-    return render_template('compliance_report.html', ads=ads)
 @compliance_bp.route('/anonymize/<ad_id>', methods=['POST'])
 @login_required
@@ -20,8 +16,12 @@ def anonymize_ad(ad_id):
     try:
         ad = Ad.query.get_or_404(ad_id)
         ad.content = "REDACTED"
         db.session.commit()
         return jsonify({'status': 'success'})
     except Exception as e:
         db.session.rollback()
-        return jsonify({'status': 'error', 'message': str(e)}), 500

 from ..models import Ad
 from ..utils.decorators import admin_required
 from .. import db
+import logging
+logger = logging.getLogger(__name__)
+compliance_bp = Blueprint('compliance', __name__)
 @compliance_bp.route('/anonymize/<ad_id>', methods=['POST'])
 @login_required
     try:
         ad = Ad.query.get_or_404(ad_id)
         ad.content = "REDACTED"
+        db.session.add(ad)
         db.session.commit()
         return jsonify({'status': 'success'})
     except Exception as e:
         db.session.rollback()
+        logger.error(f"Error anonymizing ad {ad_id}: {str(e)}")
+        return jsonify({'status': 'error', 'message': str(e)}), 500
+    finally:
+        db.session.close()

app/routes/google_ads.py ADDED Viewed

	@@ -0,0 +1,188 @@

+from flask import Blueprint, render_template, request, jsonify, current_app
+from flask_login import login_required, current_user
+from app.services.google_scraper import GoogleAdsScraper
+from app.models.google_ad import GoogleAd
+from app.services.ai_processor import AIPipeline
+from app import db, celery
+import logging
+logger = logging.getLogger(__name__)
+google_ads_bp = Blueprint('google_ads', __name__, url_prefix='/google-ads')
+@google_ads_bp.route('/', methods=['GET'])
+@login_required
+def index():
+    """Google Ads dashboard page."""
+    return render_template('google_ads/index.html')
+@google_ads_bp.route('/search', methods=['GET', 'POST'])
+@login_required
+def search_ads():
+    """Search for Google Ads."""
+    if request.method == 'POST':
+        search_query = request.form.get('query')
+        num_pages = int(request.form.get('num_pages', 3))
+        # Start async task for scraping
+        task = scrape_google_search_ads.delay(search_query, num_pages, current_user.id)
+        return jsonify({
+            'status': 'success',
+            'message': 'Google Ads scraping started',
+            'task_id': task.id
+        })
+    # GET request - show search form
+    return render_template('google_ads/search.html')
+@google_ads_bp.route('/display', methods=['GET', 'POST'])
+@login_required
+def display_ads():
+    """Scrape display ads from a URL."""
+    if request.method == 'POST':
+        target_url = request.form.get('url')
+        scroll_count = int(request.form.get('scroll_count', 5))
+        # Start async task for scraping
+        task = scrape_google_display_ads.delay(target_url, scroll_count, current_user.id)
+        return jsonify({
+            'status': 'success',
+            'message': 'Google Display Ads scraping started',
+            'task_id': task.id
+        })
+    # GET request - show form
+    return render_template('google_ads/display.html')
+@google_ads_bp.route('/results', methods=['GET'])
+@login_required
+def view_results():
+    """View Google Ads results."""
+    ad_type = request.args.get('type', 'all')
+    query = request.args.get('query', '')
+    # Build query
+    ads_query = GoogleAd.query
+    if ad_type != 'all':
+        ads_query = ads_query.filter(GoogleAd.ad_type == ad_type)
+    if query:
+        ads_query = ads_query.filter(
+            (GoogleAd.title.ilike(f'%{query}%')) |
+            (GoogleAd.description.ilike(f'%{query}%')) |
+            (GoogleAd.search_query.ilike(f'%{query}%'))
+        )
+    # Get results
+    ads = ads_query.order_by(GoogleAd.created_at.desc()).all()
+    return render_template('google_ads/results.html', ads=ads, ad_type=ad_type, query=query)
+@google_ads_bp.route('/api/ads', methods=['GET'])
+@login_required
+def api_get_ads():
+    """API endpoint to get Google Ads data."""
+    ad_type = request.args.get('type', 'all')
+    query = request.args.get('query', '')
+    limit = int(request.args.get('limit', 50))
+    # Build query
+    ads_query = GoogleAd.query
+    if ad_type != 'all':
+        ads_query = ads_query.filter(GoogleAd.ad_type == ad_type)
+    if query:
+        ads_query = ads_query.filter(
+            (GoogleAd.title.ilike(f'%{query}%')) |
+            (GoogleAd.description.ilike(f'%{query}%')) |
+            (GoogleAd.search_query.ilike(f'%{query}%'))
+        )
+    # Get results
+    ads = ads_query.order_by(GoogleAd.created_at.desc()).limit(limit).all()
+    # Convert to JSON
+    result = []
+    for ad in ads:
+        ad_data = {
+            'id': ad.id,
+            'ad_type': ad.ad_type,
+            'title': ad.title,
+            'description': ad.description,
+            'display_url': ad.display_url,
+            'target_url': ad.target_url,
+            'image_url': ad.image_url,
+            'position': ad.position,
+            'search_query': ad.search_query,
+            'page_url': ad.page_url,
+            'sentiment': ad.sentiment,
+            'created_at': ad.created_at.isoformat() if ad.created_at else None
+        }
+        result.append(ad_data)
+    return jsonify(result)
+@celery.task
+def scrape_google_search_ads(search_query, num_pages, user_id):
+    """Celery task to scrape Google search ads."""
+    try:
+        scraper = GoogleAdsScraper()
+        ads_data = scraper.scrape_search_ads(search_query, num_pages)
+        # Process and store ads
+        ai_pipeline = AIPipeline()
+        for ad_data in ads_data:
+            # Create GoogleAd instance
+            ad = GoogleAd.from_search_ad_data(ad_data, search_query, user_id)
+            # Process with AI if there's content
+            if ad.title or ad.description:
+                try:
+                    # Create a simple object with content for AI processing
+                    ad_content = type('obj', (object,), {
+                        'content': f"{ad.title} {ad.description}"
+                    })
+                    # Process with AI
+                    ai_results = ai_pipeline.process_ad(ad_content)
+                    ad.sentiment = ai_results.get('sentiment')
+                except Exception as e:
+                    logger.error(f"Error processing ad with AI: {e}")
+            # Save to database
+            db.session.add(ad)
+        db.session.commit()
+        return {'status': 'success', 'count': len(ads_data)}
+    except Exception as e:
+        logger.error(f"Error in Google search ads scraping task: {e}")
+        db.session.rollback()
+        return {'status': 'error', 'message': str(e)}
+@celery.task
+def scrape_google_display_ads(target_url, scroll_count, user_id):
+    """Celery task to scrape Google display ads."""
+    try:
+        scraper = GoogleAdsScraper()
+        ads_data = scraper.scrape_display_ads(target_url, scroll_count)
+        # Process and store ads
+        for ad_data in ads_data:
+            # Create GoogleAd instance
+            ad = GoogleAd.from_display_ad_data(ad_data, user_id)
+            # Save to database
+            db.session.add(ad)
+        db.session.commit()
+        return {'status': 'success', 'count': len(ads_data)}
+    except Exception as e:
+        logger.error(f"Error in Google display ads scraping task: {e}")
+        db.session.rollback()
+        return {'status': 'error', 'message': str(e)}

app/services/ai_processor.py CHANGED Viewed

@@ -6,6 +6,10 @@ import logging
 logger = logging.getLogger(__name__)
 class AIPipeline:
     def __init__(self):
         try:
@@ -26,6 +30,9 @@ class AIPipeline:
             raise
     def process_ad(self, ad):
         try:
             results = {
                 "sentiment": self._analyze_sentiment(ad.content),
@@ -34,8 +41,8 @@ class AIPipeline:
             }
             return results
         except Exception as e:
-            logger.error(f"Error processing ad: {e}")
-            return {"error": str(e)}
     def _analyze_sentiment(self, text):
         try:

 logger = logging.getLogger(__name__)
+class ProcessingError(Exception):
+    """Exception raised when ad processing fails."""
+    pass
 class AIPipeline:
     def __init__(self):
         try:
             raise
     def process_ad(self, ad):
+        if not ad:
+            raise ValueError("Ad content cannot be empty")
         try:
             results = {
                 "sentiment": self._analyze_sentiment(ad.content),
             }
             return results
         except Exception as e:
+            logger.error(f"Error processing ad: {str(e)}")
+            raise ProcessingError(f"Failed to process ad: {str(e)}")
     def _analyze_sentiment(self, text):
         try:

app/services/google_scraper.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from webdriver_manager.chrome import ChromeDriverManager
+import time
+from selenium.common.exceptions import TimeoutException, WebDriverException
+from contextlib import contextmanager
+import logging
+import json
+import os
+logger = logging.getLogger(__name__)
+class GoogleAdsScraper:
+    def __init__(self, selenium_hub_url=None):
+        self.driver = None
+        self.selenium_hub_url = selenium_hub_url or os.getenv('SELENIUM_HUB_URL')
+    def _setup_driver(self):
+        options = webdriver.ChromeOptions()
+        options.add_argument("--headless")
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+        if self.selenium_hub_url:
+            logger.info(f"Using Selenium Hub at {self.selenium_hub_url}")
+            return webdriver.Remote(
+                command_executor=self.selenium_hub_url,
+                options=options
+            )
+        else:
+            return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+    @contextmanager
+    def _get_driver(self):
+        try:
+            self.driver = self._setup_driver()
+            yield self.driver
+        finally:
+            if self.driver:
+                self.driver.quit()
+    def scrape_search_ads(self, search_query, num_pages=3):
+        """Scrape Google search ads for a given query."""
+        with self._get_driver() as driver:
+            try:
+                url = f"https://www.google.com/search?q={search_query}"
+                driver.get(url)
+                driver.implicitly_wait(5)
+                ads = []
+                # Process first page
+                ads.extend(self._extract_search_ads(driver))
+                # Navigate through additional pages if requested
+                for page in range(2, num_pages + 1):
+                    try:
+                        next_button = driver.find_element(By.ID, "pnnext")
+                        next_button.click()
+                        time.sleep(2)
+                        ads.extend(self._extract_search_ads(driver))
+                    except Exception as e:
+                        logger.warning(f"Could not navigate to page {page}: {e}")
+                        break
+                return ads
+            except (TimeoutException, WebDriverException) as e:
+                logger.error(f"Error during Google Ads scraping: {e}")
+                return []
+    def _extract_search_ads(self, driver):
+        """Extract ad data from the current search results page."""
+        ads = []
+        try:
+            # Look for ad containers
+            ad_elements = driver.find_elements(By.CSS_SELECTOR, "div.uEierd")
+            for ad in ad_elements:
+                try:
+                    ad_data = {}
+                    # Extract ad title
+                    title_element = ad.find_element(By.CSS_SELECTOR, "div.CCgQ5.vCa9Yd.QfkTvb.MUxGbd.v0nnCb")
+                    ad_data["title"] = title_element.text if title_element else ""
+                    # Extract ad description
+                    desc_element = ad.find_element(By.CSS_SELECTOR, "div.MUxGbd.yDYNvb.lyLwlc")
+                    ad_data["description"] = desc_element.text if desc_element else ""
+                    # Extract ad URL
+                    url_element = ad.find_element(By.CSS_SELECTOR, "a.sVXRqc")
+                    ad_data["display_url"] = url_element.text if url_element else ""
+                    ad_data["target_url"] = url_element.get_attribute("href") if url_element else ""
+                    # Extract ad position
+                    ad_data["position"] = len(ads) + 1
+                    # Add timestamp
+                    ad_data["scrape_time"] = time.strftime("%Y-%m-%d %H:%M:%S")
+                    ads.append(ad_data)
+                except Exception as e:
+                    logger.warning(f"Error extracting ad data: {e}")
+                    continue
+            return ads
+        except Exception as e:
+            logger.error(f"Error extracting search ads: {e}")
+            return []
+    def scrape_display_ads(self, target_url, scroll_count=5):
+        """Scrape Google display ads from a specific page."""
+        with self._get_driver() as driver:
+            try:
+                driver.get(target_url)
+                driver.implicitly_wait(5)
+                # Scroll to load dynamic content
+                for _ in range(scroll_count):
+                    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+                    time.sleep(2)
+                # Extract iframe ads
+                iframes = driver.find_elements(By.CSS_SELECTOR, "iframe[id^='google_ads_iframe']")
+                ads = []
+                for iframe in iframes:
+                    try:
+                        # Switch to iframe context
+                        driver.switch_to.frame(iframe)
+                        # Extract ad data
+                        ad_data = {
+                            "iframe_id": iframe.get_attribute("id"),
+                            "width": iframe.get_attribute("width"),
+                            "height": iframe.get_attribute("height"),
+                            "scrape_time": time.strftime("%Y-%m-%d %H:%M:%S"),
+                            "page_url": target_url
+                        }
+                        # Try to get the ad image
+                        try:
+                            img = driver.find_element(By.CSS_SELECTOR, "img")
+                            ad_data["image_url"] = img.get_attribute("src")
+                        except:
+                            ad_data["image_url"] = None
+                        # Try to get the ad destination
+                        try:
+                            link = driver.find_element(By.CSS_SELECTOR, "a")
+                            ad_data["target_url"] = link.get_attribute("href")
+                        except:
+                            ad_data["target_url"] = None
+                        ads.append(ad_data)
+                        # Switch back to main content
+                        driver.switch_to.default_content()
+                    except Exception as e:
+                        logger.warning(f"Error processing iframe: {e}")
+                        driver.switch_to.default_content()
+                        continue
+                return ads
+            except (TimeoutException, WebDriverException) as e:
+                logger.error(f"Error during Google Display Ads scraping: {e}")
+                return []

app/templates/base.html CHANGED Viewed

@@ -3,23 +3,55 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Facebook Ad Analytics</title>
     <link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
 </head>
 <body>
-    <header>
-        <h1>Facebook Ad Analytics</h1>
-        <nav>
-            <a href="{{ url_for('dashboard.index') }}">Dashboard</a>
-            <a href="{{ url_for('compliance.compliance_report') }}">Compliance</a>
-            <a href="{{ url_for('auth.logout') }}">Logout</a>
-        </nav>
-    </header>
     <main>
         {% block content %}{% endblock %}
     </main>
-    <footer>
-        <p>&copy; 2023 Facebook Ad Analytics</p>
     </footer>
 </body>
 </html>

 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{% block title %}Facebook Ad Analytics{% endblock %}</title>
+    <!-- Bootstrap CSS -->
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet">
+    <!-- Custom CSS -->
     <link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
+    {% block head_extra %}{% endblock %}
 </head>
 <body>
+    <nav class="navbar navbar-expand-lg navbar-dark bg-dark">
+        <div class="container">
+            <a class="navbar-brand" href="{{ url_for('dashboard.index') }}">Ad Analytics</a>
+            <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
+                <span class="navbar-toggler-icon"></span>
+            </button>
+            <div class="collapse navbar-collapse" id="navbarNav">
+                <ul class="navbar-nav me-auto">
+                    <li class="nav-item">
+                        <a class="nav-link" href="{{ url_for('dashboard.index') }}">Dashboard</a>
+                    </li>
+                    <li class="nav-item">
+                        <a class="nav-link" href="{{ url_for('google_ads.index') }}">Google Ads</a>
+                    </li>
+                    <li class="nav-item">
+                        <a class="nav-link" href="{{ url_for('compliance.compliance_report') }}">Compliance</a>
+                    </li>
+                </ul>
+                <ul class="navbar-nav">
+                    <li class="nav-item">
+                        <a class="nav-link" href="{{ url_for('auth.logout') }}">Logout</a>
+                    </li>
+                </ul>
+            </div>
+        </div>
+    </nav>
     <main>
         {% block content %}{% endblock %}
     </main>
+    <footer class="bg-dark text-white text-center py-3 mt-5">
+        <div class="container">
+            <p class="mb-0">&copy; 2023 Ad Analytics Platform</p>
+        </div>
     </footer>
+    <!-- Bootstrap Bundle with Popper -->
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"></script>
+    <!-- jQuery -->
+    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
+    {% block scripts %}{% endblock %}
 </body>
 </html>

app/templates/dashboard.html CHANGED Viewed

@@ -1,30 +1,94 @@
 {% extends "base.html" %}
 {% block content %}
-<div class="filters">
-    <input type="text" name="query" placeholder="Search ads..." value="{{ query }}">
-    <select name="sentiment">
-        <option value="">All Sentiments</option>
-        <option value="Positive" {% if sentiment_filter == "Positive" %}selected{% endif %}>Positive</option>
-        <option value="Negative" {% if sentiment_filter == "Negative" %}selected{% endif %}>Negative</option>
-    </select>
-    <button type="button" onclick="applyFilters()">Apply</button>
-</div>
-<div class="ads-list">
-    {% for ad in ads.items %}
-    <div class="ad-card">
-        <p>{{ ad.content }}</p>
-        <span class="sentiment">{{ ad.sentiment }}</span>
     </div>
-    {% endfor %}
-</div>
-<div class="pagination">
-    {% for p in range(1, ads.pages + 1) %}
-    <a href="?page={{ p }}&query={{ query }}&sentiment={{ sentiment_filter }}"
-       class="{% if p == ads.page %}active{% endif %}">{{ p }}</a>
-    {% endfor %}
 </div>
 <script>

 {% extends "base.html" %}
 {% block content %}
+<div class="container mt-4">
+    <h1 class="mb-4">Facebook Ad Analytics Dashboard</h1>
+    <div class="row mb-4">
+        <div class="col-md-4">
+            <div class="card">
+                <div class="card-header bg-primary text-white">
+                    <h5 class="card-title mb-0">Facebook Ads</h5>
+                </div>
+                <div class="card-body">
+                    <p class="card-text">View and analyze Facebook ads.</p>
+                    <a href="#" class="btn btn-primary">View Ads</a>
+                </div>
+            </div>
+        </div>
+        <div class="col-md-4">
+            <div class="card">
+                <div class="card-header bg-success text-white">
+                    <h5 class="card-title mb-0">Google Ads</h5>
+                </div>
+                <div class="card-body">
+                    <p class="card-text">Scrape and analyze Google ads.</p>
+                    <a href="{{ url_for('google_ads.index') }}" class="btn btn-success">Google Ads</a>
+                </div>
+            </div>
+        </div>
+        <div class="col-md-4">
+            <div class="card">
+                <div class="card-header bg-info text-white">
+                    <h5 class="card-title mb-0">Compliance</h5>
+                </div>
+                <div class="card-body">
+                    <p class="card-text">Generate compliance reports.</p>
+                    <a href="#" class="btn btn-info">Compliance</a>
+                </div>
+            </div>
+        </div>
+    </div>
+    <div class="filters mb-4">
+        <div class="card">
+            <div class="card-header">
+                <h5 class="card-title mb-0">Filter Ads</h5>
+            </div>
+            <div class="card-body">
+                <div class="row">
+                    <div class="col-md-5">
+                        <input type="text" class="form-control" name="query" placeholder="Search ads..." value="{{ query }}">
+                    </div>
+                    <div class="col-md-5">
+                        <select class="form-select" name="sentiment">
+                            <option value="">All Sentiments</option>
+                            <option value="Positive" {% if sentiment_filter == "Positive" %}selected{% endif %}>Positive</option>
+                            <option value="Negative" {% if sentiment_filter == "Negative" %}selected{% endif %}>Negative</option>
+                        </select>
+                    </div>
+                    <div class="col-md-2">
+                        <button type="button" class="btn btn-primary w-100" onclick="applyFilters()">Apply</button>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+    <div class="ads-list">
+        {% for ad in ads.items %}
+        <div class="card mb-3">
+            <div class="card-body">
+                <p class="card-text">{{ ad.content }}</p>
+                <span class="badge {% if ad.sentiment == 'Positive' %}bg-success{% elif ad.sentiment == 'Negative' %}bg-danger{% else %}bg-secondary{% endif %}">
+                    {{ ad.sentiment }}
+                </span>
+            </div>
+        </div>
+        {% endfor %}
     </div>
+    <nav aria-label="Page navigation">
+        <ul class="pagination justify-content-center">
+            {% for p in range(1, ads.pages + 1) %}
+            <li class="page-item {% if p == ads.page %}active{% endif %}">
+                <a class="page-link" href="?page={{ p }}&query={{ query }}&sentiment={{ sentiment_filter }}">{{ p }}</a>
+            </li>
+            {% endfor %}
+        </ul>
+    </nav>
 </div>
 <script>

app/templates/google_ads/display.html ADDED Viewed

	@@ -0,0 +1,95 @@

+{% extends "base.html" %}
+{% block title %}Google Display Ads Scraper{% endblock %}
+{% block content %}
+<div class="container mt-4">
+    <nav aria-label="breadcrumb">
+        <ol class="breadcrumb">
+            <li class="breadcrumb-item"><a href="{{ url_for('google_ads.index') }}">Google Ads</a></li>
+            <li class="breadcrumb-item active" aria-current="page">Display Ads</li>
+        </ol>
+    </nav>
+    <h1 class="mb-4">Google Display Ads Scraper</h1>
+    <div class="card">
+        <div class="card-header bg-success text-white">
+            <h5 class="card-title mb-0">Scrape Display Ads from a Website</h5>
+        </div>
+        <div class="card-body">
+            <form id="display-form" method="post">
+                <div class="mb-3">
+                    <label for="url" class="form-label">Target URL</label>
+                    <input type="url" class="form-control" id="url" name="url" required
+                           placeholder="https://example.com">
+                    <div class="form-text">Enter a website URL that displays Google Ads.</div>
+                </div>
+                <div class="mb-3">
+                    <label for="scroll_count" class="form-label">Scroll Count</label>
+                    <input type="number" class="form-control" id="scroll_count" name="scroll_count"
+                           value="5" min="1" max="20">
+                    <div class="form-text">How many times to scroll the page to load dynamic content (1-20).</div>
+                </div>
+                <button type="submit" class="btn btn-success" id="submit-btn">Start Scraping</button>
+            </form>
+            <div id="result-container" class="mt-4 d-none">
+                <div class="alert alert-info">
+                    <h5>Scraping in Progress</h5>
+                    <p>Your Google Display Ads scraping task has been started. This may take a few minutes.</p>
+                    <p>Task ID: <span id="task-id"></span></p>
+                    <p>You can view results once the task is complete.</p>
+                    <a href="{{ url_for('google_ads.view_results') }}?type=display" class="btn btn-info">View Results</a>
+                </div>
+            </div>
+        </div>
+    </div>
+    <div class="card mt-4">
+        <div class="card-header bg-info text-white">
+            <h5 class="card-title mb-0">Tips for Display Ad Scraping</h5>
+        </div>
+        <div class="card-body">
+            <ul>
+                <li>Choose websites that are known to display Google Ads.</li>
+                <li>News sites, blogs, and content-heavy websites often have more display ads.</li>
+                <li>Some websites may block automated scraping.</li>
+                <li>The tool looks for iframes with Google Ads signatures.</li>
+                <li>Not all ads may be captured due to dynamic loading or anti-scraping measures.</li>
+            </ul>
+        </div>
+    </div>
+</div>
+{% endblock %}
+{% block scripts %}
+<script>
+    $(document).ready(function() {
+        $('#display-form').on('submit', function(e) {
+            e.preventDefault();
+            const submitBtn = $('#submit-btn');
+            submitBtn.prop('disabled', true).html('<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> Processing...');
+            $.ajax({
+                url: "{{ url_for('google_ads.display_ads') }}",
+                type: "POST",
+                data: $(this).serialize(),
+                success: function(response) {
+                    $('#result-container').removeClass('d-none');
+                    $('#task-id').text(response.task_id);
+                    submitBtn.prop('disabled', false).text('Start Scraping');
+                },
+                error: function(xhr) {
+                    alert('Error: ' + xhr.responseJSON.message);
+                    submitBtn.prop('disabled', false).text('Start Scraping');
+                }
+            });
+        });
+    });
+</script>
+{% endblock %}

app/templates/google_ads/index.html ADDED Viewed

	@@ -0,0 +1,49 @@

+{% extends "base.html" %}
+{% block title %}Google Ads Analytics{% endblock %}
+{% block content %}
+<div class="container mt-4">
+    <h1 class="mb-4">Google Ads Analytics</h1>
+    <div class="row">
+        <div class="col-md-6">
+            <div class="card mb-4">
+                <div class="card-header bg-primary text-white">
+                    <h5 class="card-title mb-0">Search Ads</h5>
+                </div>
+                <div class="card-body">
+                    <p class="card-text">Scrape and analyze Google Search Ads for specific keywords.</p>
+                    <a href="{{ url_for('google_ads.search_ads') }}" class="btn btn-primary">Search Ads</a>
+                </div>
+            </div>
+        </div>
+        <div class="col-md-6">
+            <div class="card mb-4">
+                <div class="card-header bg-success text-white">
+                    <h5 class="card-title mb-0">Display Ads</h5>
+                </div>
+                <div class="card-body">
+                    <p class="card-text">Scrape and analyze Google Display Ads from specific websites.</p>
+                    <a href="{{ url_for('google_ads.display_ads') }}" class="btn btn-success">Display Ads</a>
+                </div>
+            </div>
+        </div>
+    </div>
+    <div class="row">
+        <div class="col-12">
+            <div class="card">
+                <div class="card-header bg-info text-white">
+                    <h5 class="card-title mb-0">View Results</h5>
+                </div>
+                <div class="card-body">
+                    <p class="card-text">View and analyze your collected Google Ads data.</p>
+                    <a href="{{ url_for('google_ads.view_results') }}" class="btn btn-info">View Results</a>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+{% endblock %}

app/templates/google_ads/results.html ADDED Viewed

	@@ -0,0 +1,252 @@

+{% extends "base.html" %}
+{% block title %}Google Ads Results{% endblock %}
+{% block content %}
+<div class="container mt-4">
+    <nav aria-label="breadcrumb">
+        <ol class="breadcrumb">
+            <li class="breadcrumb-item"><a href="{{ url_for('google_ads.index') }}">Google Ads</a></li>
+            <li class="breadcrumb-item active" aria-current="page">Results</li>
+        </ol>
+    </nav>
+    <h1 class="mb-4">Google Ads Results</h1>
+    <div class="card mb-4">
+        <div class="card-header bg-info text-white">
+            <h5 class="card-title mb-0">Filter Results</h5>
+        </div>
+        <div class="card-body">
+            <form method="get" class="row g-3">
+                <div class="col-md-4">
+                    <label for="type" class="form-label">Ad Type</label>
+                    <select class="form-select" id="type" name="type">
+                        <option value="all" {% if ad_type == 'all' %}selected{% endif %}>All Types</option>
+                        <option value="search" {% if ad_type == 'search' %}selected{% endif %}>Search Ads</option>
+                        <option value="display" {% if ad_type == 'display' %}selected{% endif %}>Display Ads</option>
+                    </select>
+                </div>
+                <div class="col-md-6">
+                    <label for="query" class="form-label">Search</label>
+                    <input type="text" class="form-control" id="query" name="query" value="{{ query }}"
+                           placeholder="Search in titles, descriptions, or keywords">
+                </div>
+                <div class="col-md-2 d-flex align-items-end">
+                    <button type="submit" class="btn btn-primary w-100">Filter</button>
+                </div>
+            </form>
+        </div>
+    </div>
+    {% if ads %}
+        <div class="card">
+            <div class="card-header bg-success text-white">
+                <h5 class="card-title mb-0">{{ ads|length }} Ads Found</h5>
+            </div>
+            <div class="card-body p-0">
+                <div class="table-responsive">
+                    <table class="table table-striped table-hover mb-0">
+                        <thead>
+                            <tr>
+                                <th>Type</th>
+                                <th>Title/Image</th>
+                                <th>Description</th>
+                                <th>URL</th>
+                                <th>Sentiment</th>
+                                <th>Date</th>
+                                <th>Actions</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                            {% for ad in ads %}
+                                <tr>
+                                    <td>
+                                        {% if ad.ad_type == 'search' %}
+                                            <span class="badge bg-primary">Search</span>
+                                        {% else %}
+                                            <span class="badge bg-success">Display</span>
+                                        {% endif %}
+                                    </td>
+                                    <td>
+                                        {% if ad.ad_type == 'search' %}
+                                            {{ ad.title }}
+                                        {% else %}
+                                            {% if ad.image_url %}
+                                                <img src="{{ ad.image_url }}" alt="Ad Image" style="max-width: 100px; max-height: 60px;">
+                                            {% else %}
+                                                <span class="text-muted">No image</span>
+                                            {% endif %}
+                                        {% endif %}
+                                    </td>
+                                    <td>
+                                        {% if ad.description %}
+                                            {{ ad.description|truncate(100) }}
+                                        {% else %}
+                                            <span class="text-muted">No description</span>
+                                        {% endif %}
+                                    </td>
+                                    <td>
+                                        {% if ad.target_url %}
+                                            <a href="{{ ad.target_url }}" target="_blank" rel="noopener noreferrer">
+                                                {{ ad.display_url or ad.target_url|truncate(30) }}
+                                            </a>
+                                        {% else %}
+                                            <span class="text-muted">No URL</span>
+                                        {% endif %}
+                                    </td>
+                                    <td>
+                                        {% if ad.sentiment %}
+                                            <span class="badge
+                                                {% if ad.sentiment.label == 'POSITIVE' %}bg-success
+                                                {% elif ad.sentiment.label == 'NEGATIVE' %}bg-danger
+                                                {% else %}bg-secondary{% endif %}">
+                                                {{ ad.sentiment.label }} ({{ (ad.sentiment.score * 100)|round(1) }}%)
+                                            </span>
+                                        {% else %}
+                                            <span class="text-muted">Not analyzed</span>
+                                        {% endif %}
+                                    </td>
+                                    <td>{{ ad.created_at.strftime('%Y-%m-%d %H:%M') }}</td>
+                                    <td>
+                                        <button class="btn btn-sm btn-info view-details"
+                                                data-id="{{ ad.id }}"
+                                                data-bs-toggle="modal"
+                                                data-bs-target="#adDetailsModal">
+                                            Details
+                                        </button>
+                                    </td>
+                                </tr>
+                            {% endfor %}
+                        </tbody>
+                    </table>
+                </div>
+            </div>
+        </div>
+    {% else %}
+        <div class="alert alert-info">
+            <h5>No ads found</h5>
+            <p>No Google Ads match your search criteria. Try changing your filters or scrape some ads first.</p>
+            <div class="mt-3">
+                <a href="{{ url_for('google_ads.search_ads') }}" class="btn btn-primary me-2">Scrape Search Ads</a>
+                <a href="{{ url_for('google_ads.display_ads') }}" class="btn btn-success">Scrape Display Ads</a>
+            </div>
+        </div>
+    {% endif %}
+</div>
+<!-- Ad Details Modal -->
+<div class="modal fade" id="adDetailsModal" tabindex="-1" aria-labelledby="adDetailsModalLabel" aria-hidden="true">
+    <div class="modal-dialog modal-lg">
+        <div class="modal-content">
+            <div class="modal-header">
+                <h5 class="modal-title" id="adDetailsModalLabel">Ad Details</h5>
+                <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
+            </div>
+            <div class="modal-body">
+                <div id="adDetailsContent">
+                    <div class="text-center">
+                        <div class="spinner-border" role="status">
+                            <span class="visually-hidden">Loading...</span>
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <div class="modal-footer">
+                <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
+            </div>
+        </div>
+    </div>
+</div>
+{% endblock %}
+{% block scripts %}
+<script>
+    $(document).ready(function() {
+        $('.view-details').on('click', function() {
+            const adId = $(this).data('id');
+            // Clear previous content and show loading spinner
+            $('#adDetailsContent').html('<div class="text-center"><div class="spinner-border" role="status"><span class="visually-hidden">Loading...</span></div></div>');
+            // Fetch ad details
+            $.ajax({
+                url: "{{ url_for('google_ads.api_get_ads') }}?id=" + adId,
+                type: "GET",
+                success: function(response) {
+                    if (response && response.length > 0) {
+                        const ad = response[0];
+                        let content = '<div class="ad-details">';
+                        // Ad type badge
+                        content += '<div class="mb-3">';
+                        if (ad.ad_type === 'search') {
+                            content += '<span class="badge bg-primary">Search Ad</span>';
+                        } else {
+                            content += '<span class="badge bg-success">Display Ad</span>';
+                        }
+                        content += '</div>';
+                        // Title and description
+                        if (ad.title) {
+                            content += '<h4>' + ad.title + '</h4>';
+                        }
+                        if (ad.description) {
+                            content += '<p>' + ad.description + '</p>';
+                        }
+                        // Image for display ads
+                        if (ad.image_url) {
+                            content += '<div class="text-center mb-3"><img src="' + ad.image_url + '" alt="Ad Image" style="max-width: 100%;"></div>';
+                        }
+                        // URL
+                        if (ad.target_url) {
+                            content += '<div class="mb-3"><strong>URL:</strong> <a href="' + ad.target_url + '" target="_blank">' + (ad.display_url || ad.target_url) + '</a></div>';
+                        }
+                        // Search query for search ads
+                        if (ad.search_query) {
+                            content += '<div class="mb-3"><strong>Search Query:</strong> ' + ad.search_query + '</div>';
+                        }
+                        // Page URL for display ads
+                        if (ad.page_url) {
+                            content += '<div class="mb-3"><strong>Found on:</strong> <a href="' + ad.page_url + '" target="_blank">' + ad.page_url + '</a></div>';
+                        }
+                        // Position for search ads
+                        if (ad.position) {
+                            content += '<div class="mb-3"><strong>Position:</strong> ' + ad.position + '</div>';
+                        }
+                        // Sentiment analysis
+                        if (ad.sentiment) {
+                            let sentimentClass = 'bg-secondary';
+                            if (ad.sentiment.label === 'POSITIVE') sentimentClass = 'bg-success';
+                            if (ad.sentiment.label === 'NEGATIVE') sentimentClass = 'bg-danger';
+                            content += '<div class="mb-3"><strong>Sentiment:</strong> ';
+                            content += '<span class="badge ' + sentimentClass + '">' + ad.sentiment.label + ' (' + (ad.sentiment.score * 100).toFixed(1) + '%)</span></div>';
+                        }
+                        // Date
+                        if (ad.created_at) {
+                            content += '<div class="mb-3"><strong>Scraped on:</strong> ' + ad.created_at + '</div>';
+                        }
+                        content += '</div>';
+                        $('#adDetailsContent').html(content);
+                    } else {
+                        $('#adDetailsContent').html('<div class="alert alert-danger">Ad details not found</div>');
+                    }
+                },
+                error: function() {
+                    $('#adDetailsContent').html('<div class="alert alert-danger">Error loading ad details</div>');
+                }
+            });
+        });
+    });
+</script>
+{% endblock %}

app/templates/google_ads/search.html ADDED Viewed

	@@ -0,0 +1,80 @@

+{% extends "base.html" %}
+{% block title %}Google Search Ads Scraper{% endblock %}
+{% block content %}
+<div class="container mt-4">
+    <nav aria-label="breadcrumb">
+        <ol class="breadcrumb">
+            <li class="breadcrumb-item"><a href="{{ url_for('google_ads.index') }}">Google Ads</a></li>
+            <li class="breadcrumb-item active" aria-current="page">Search Ads</li>
+        </ol>
+    </nav>
+    <h1 class="mb-4">Google Search Ads Scraper</h1>
+    <div class="card">
+        <div class="card-header bg-primary text-white">
+            <h5 class="card-title mb-0">Search for Google Ads</h5>
+        </div>
+        <div class="card-body">
+            <form id="search-form" method="post">
+                <div class="mb-3">
+                    <label for="query" class="form-label">Search Query</label>
+                    <input type="text" class="form-control" id="query" name="query" required
+                           placeholder="Enter a keyword or phrase (e.g., 'buy shoes online')">
+                    <div class="form-text">Enter a keyword that would trigger ads on Google Search.</div>
+                </div>
+                <div class="mb-3">
+                    <label for="num_pages" class="form-label">Number of Pages</label>
+                    <input type="number" class="form-control" id="num_pages" name="num_pages"
+                           value="3" min="1" max="10">
+                    <div class="form-text">How many search result pages to scrape (1-10).</div>
+                </div>
+                <button type="submit" class="btn btn-primary" id="submit-btn">Start Scraping</button>
+            </form>
+            <div id="result-container" class="mt-4 d-none">
+                <div class="alert alert-info">
+                    <h5>Scraping in Progress</h5>
+                    <p>Your Google Ads scraping task has been started. This may take a few minutes.</p>
+                    <p>Task ID: <span id="task-id"></span></p>
+                    <p>You can view results once the task is complete.</p>
+                    <a href="{{ url_for('google_ads.view_results') }}" class="btn btn-info">View Results</a>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+{% endblock %}
+{% block scripts %}
+<script>
+    $(document).ready(function() {
+        $('#search-form').on('submit', function(e) {
+            e.preventDefault();
+            const submitBtn = $('#submit-btn');
+            submitBtn.prop('disabled', true).html('<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> Processing...');
+            $.ajax({
+                url: "{{ url_for('google_ads.search_ads') }}",
+                type: "POST",
+                data: $(this).serialize(),
+                success: function(response) {
+                    $('#result-container').removeClass('d-none');
+                    $('#task-id').text(response.task_id);
+                    submitBtn.prop('disabled', false).text('Start Scraping');
+                },
+                error: function(xhr) {
+                    alert('Error: ' + xhr.responseJSON.message);
+                    submitBtn.prop('disabled', false).text('Start Scraping');
+                }
+            });
+        });
+    });
+</script>
+{% endblock %}

app/utils/decorators.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from functools import wraps
-from flask import redirect, url_for, flash
 from flask_login import current_user
 def admin_required(f):

 from functools import wraps
+from flask import redirect, url_for, flash, jsonify
 from flask_login import current_user
 def admin_required(f):

config.py CHANGED Viewed

@@ -1,20 +1,52 @@
 import os
 class Config:
-    SECRET_KEY = os.getenv('SECRET_KEY', 'your-secret-key')
     SQLALCHEMY_DATABASE_URI = os.getenv('DATABASE_URL', 'sqlite:////tmp/app.db')
     SQLALCHEMY_TRACK_MODIFICATIONS = False
     CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0')
     CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0')
-    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'your-openai-api-key')
-    INSTANCE_PATH = '/tmp/instance'  # Set a writable instance path
 class DevelopmentConfig(Config):
     DEBUG = True
 class ProductionConfig(Config):
     DEBUG = False
 class TestingConfig(Config):
     TESTING = True
-    SQLALCHEMY_DATABASE_URI = 'sqlite:///:memory:'

 import os
+from pathlib import Path
 class Config:
+    # Use a default secret key for development, but warn about it
+    SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production')
+    if SECRET_KEY == 'dev-secret-key-change-in-production':
+        print("WARNING: Using default SECRET_KEY. This is insecure and should be changed in production.")
     SQLALCHEMY_DATABASE_URI = os.getenv('DATABASE_URL', 'sqlite:////tmp/app.db')
     SQLALCHEMY_TRACK_MODIFICATIONS = False
     CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0')
     CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0')
+    # Use a mock API key for development if not provided
+    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'sk-mock-key-for-development')
+    if OPENAI_API_KEY == 'sk-mock-key-for-development':
+        print("WARNING: Using mock OPENAI_API_KEY. API calls will fail in production.")
+    # Ensure instance path exists
+    INSTANCE_PATH = os.getenv('INSTANCE_PATH', '/tmp/instance')
+    Path(INSTANCE_PATH).mkdir(parents=True, exist_ok=True)
 class DevelopmentConfig(Config):
     DEBUG = True
+    TESTING = False
 class ProductionConfig(Config):
     DEBUG = False
+    TESTING = False
+    # In production, we require real values for these settings
+    def __init__(self):
+        if self.SECRET_KEY == 'dev-secret-key-change-in-production':
+            raise ValueError("Production requires a real SECRET_KEY")
+        if self.OPENAI_API_KEY == 'sk-mock-key-for-development':
+            raise ValueError("Production requires a real OPENAI_API_KEY")
 class TestingConfig(Config):
     TESTING = True
+    DEBUG = True
+    SQLALCHEMY_DATABASE_URI = 'sqlite:///:memory:'
+# Function to determine which config to use
+def get_config():
+    env = os.getenv('FLASK_ENV', 'development')
+    if env == 'production':
+        return ProductionConfig()
+    elif env == 'testing':
+        return TestingConfig()
+    else:
+        return DevelopmentConfig()

hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/INSTALLER DELETED Viewed

	@@ -1 +0,0 @@
1	- pip

hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/LICENSE DELETED Viewed

@@ -1,20 +0,0 @@
-Copyright (c) 2017-2021 Ingy döt Net
-Copyright (c) 2006-2016 Kirill Simonov
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/METADATA DELETED Viewed

@@ -1,46 +0,0 @@
-Metadata-Version: 2.1
-Name: PyYAML
-Version: 6.0.2
-Summary: YAML parser and emitter for Python
-Home-page: https://pyyaml.org/
-Download-URL: https://pypi.org/project/PyYAML/
-Author: Kirill Simonov
-Author-email: xi@resolvent.net
-License: MIT
-Project-URL: Bug Tracker, https://github.com/yaml/pyyaml/issues
-Project-URL: CI, https://github.com/yaml/pyyaml/actions
-Project-URL: Documentation, https://pyyaml.org/wiki/PyYAMLDocumentation
-Project-URL: Mailing lists, http://lists.sourceforge.net/lists/listinfo/yaml-core
-Project-URL: Source Code, https://github.com/yaml/pyyaml
-Platform: Any
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Cython
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Classifier: Programming Language :: Python :: Implementation :: CPython
-Classifier: Programming Language :: Python :: Implementation :: PyPy
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Classifier: Topic :: Text Processing :: Markup
-Requires-Python: >=3.8
-License-File: LICENSE
-YAML is a data serialization format designed for human readability
-and interaction with scripting languages.  PyYAML is a YAML parser
-and emitter for Python.
-PyYAML features a complete YAML 1.1 parser, Unicode support, pickle
-support, capable extension API, and sensible error messages.  PyYAML
-supports standard YAML tags and provides Python-specific tags that
-allow to represent an arbitrary Python object.
-PyYAML is applicable for a broad range of tasks from complex
-configuration files to object serialization and persistence.

hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/RECORD DELETED Viewed

@@ -1,43 +0,0 @@
-PyYAML-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
-PyYAML-6.0.2.dist-info/LICENSE,sha256=jTko-dxEkP1jVwfLiOsmvXZBAqcoKVQwfT5RZ6V36KQ,1101
-PyYAML-6.0.2.dist-info/METADATA,sha256=9lwXqTOrXPts-jI2Lo5UwuaAYo0hiRA0BZqjch0WjAk,2106
-PyYAML-6.0.2.dist-info/RECORD,,
-PyYAML-6.0.2.dist-info/WHEEL,sha256=c7SWG1_hRvc9HXHEkmWlTu1Jr4WpzRucfzqTP-_8q0s,102
-PyYAML-6.0.2.dist-info/top_level.txt,sha256=rpj0IVMTisAjh_1vG3Ccf9v5jpCQwAz6cD1IVU5ZdhQ,11
-_yaml/__init__.py,sha256=04Ae_5osxahpJHa3XBZUAf4wi6XX32gR8D6X6p64GEA,1402
-_yaml/__pycache__/__init__.cpython-312.pyc,,
-yaml/__init__.py,sha256=N35S01HMesFTe0aRRMWkPj0Pa8IEbHpE9FK7cr5Bdtw,12311
-yaml/__pycache__/__init__.cpython-312.pyc,,
-yaml/__pycache__/composer.cpython-312.pyc,,
-yaml/__pycache__/constructor.cpython-312.pyc,,
-yaml/__pycache__/cyaml.cpython-312.pyc,,
-yaml/__pycache__/dumper.cpython-312.pyc,,
-yaml/__pycache__/emitter.cpython-312.pyc,,
-yaml/__pycache__/error.cpython-312.pyc,,
-yaml/__pycache__/events.cpython-312.pyc,,
-yaml/__pycache__/loader.cpython-312.pyc,,
-yaml/__pycache__/nodes.cpython-312.pyc,,
-yaml/__pycache__/parser.cpython-312.pyc,,
-yaml/__pycache__/reader.cpython-312.pyc,,
-yaml/__pycache__/representer.cpython-312.pyc,,
-yaml/__pycache__/resolver.cpython-312.pyc,,
-yaml/__pycache__/scanner.cpython-312.pyc,,
-yaml/__pycache__/serializer.cpython-312.pyc,,
-yaml/__pycache__/tokens.cpython-312.pyc,,
-yaml/_yaml.cp312-win_amd64.pyd,sha256=Bx7e_LEQx7cnd1_A9_nClp3X77g-_Lw1aoAAtYZbwWk,263680
-yaml/composer.py,sha256=_Ko30Wr6eDWUeUpauUGT3Lcg9QPBnOPVlTnIMRGJ9FM,4883
-yaml/constructor.py,sha256=kNgkfaeLUkwQYY_Q6Ff1Tz2XVw_pG1xVE9Ak7z-viLA,28639
-yaml/cyaml.py,sha256=6ZrAG9fAYvdVe2FK_w0hmXoG7ZYsoYUwapG8CiC72H0,3851
-yaml/dumper.py,sha256=PLctZlYwZLp7XmeUdwRuv4nYOZ2UBnDIUy8-lKfLF-o,2837
-yaml/emitter.py,sha256=jghtaU7eFwg31bG0B7RZea_29Adi9CKmXq_QjgQpCkQ,43006
-yaml/error.py,sha256=Ah9z-toHJUbE9j-M8YpxgSRM5CgLCcwVzJgLLRF2Fxo,2533
-yaml/events.py,sha256=50_TksgQiE4up-lKo_V-nBy-tAIxkIPQxY5qDhKCeHw,2445
-yaml/loader.py,sha256=UVa-zIqmkFSCIYq_PgSGm4NSJttHY2Rf_zQ4_b1fHN0,2061
-yaml/nodes.py,sha256=gPKNj8pKCdh2d4gr3gIYINnPOaOxGhJAUiYhGRnPE84,1440
-yaml/parser.py,sha256=ilWp5vvgoHFGzvOZDItFoGjD6D42nhlZrZyjAwa0oJo,25495
-yaml/reader.py,sha256=0dmzirOiDG4Xo41RnuQS7K9rkY3xjHiVasfDMNTqCNw,6794
-yaml/representer.py,sha256=IuWP-cAW9sHKEnS0gCqSa894k1Bg4cgTxaDwIcbRQ-Y,14190
-yaml/resolver.py,sha256=9L-VYfm4mWHxUD1Vg4X7rjDRK_7VZd6b92wzq7Y2IKY,9004
-yaml/scanner.py,sha256=YEM3iLZSaQwXcQRg2l2R4MdT0zGP2F9eHkKGKnHyWQY,51279
-yaml/serializer.py,sha256=ChuFgmhU01hj4xgI8GaKv6vfM2Bujwa9i7d2FAHj7cA,4165
-yaml/tokens.py,sha256=lTQIzSVw8Mg9wv459-TjiOQe6wVziqaRlqX2_89rp54,2573

hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/WHEEL DELETED Viewed

@@ -1,5 +0,0 @@
-Wheel-Version: 1.0
-Generator: bdist_wheel (0.44.0)
-Root-Is-Purelib: false
-Tag: cp312-cp312-win_amd64

hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/top_level.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- _yaml
2	- yaml

hf_env/Lib/site-packages/__pycache__/typing_extensions.cpython-312.pyc DELETED Viewed

Binary file (139 kB)

hf_env/Lib/site-packages/_yaml/__init__.py DELETED Viewed

@@ -1,33 +0,0 @@
-# This is a stub package designed to roughly emulate the _yaml
-# extension module, which previously existed as a standalone module
-# and has been moved into the `yaml` package namespace.
-# It does not perfectly mimic its old counterpart, but should get
-# close enough for anyone who's relying on it even when they shouldn't.
-import yaml
-# in some circumstances, the yaml module we imoprted may be from a different version, so we need
-# to tread carefully when poking at it here (it may not have the attributes we expect)
-if not getattr(yaml, '__with_libyaml__', False):
-    from sys import version_info
-    exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
-    raise exc("No module named '_yaml'")
-else:
-    from yaml._yaml import *
-    import warnings
-    warnings.warn(
-        'The _yaml extension module is now located at yaml._yaml'
-        ' and its location is subject to change.  To use the'
-        ' LibYAML-based parser and emitter, import from `yaml`:'
-        ' `from yaml import CLoader as Loader, CDumper as Dumper`.',
-        DeprecationWarning
-    )
-    del warnings
-    # Don't `del yaml` here because yaml is actually an existing
-    # namespace member of _yaml.
-__name__ = '_yaml'
-# If the module is top-level (i.e. not a part of any specific package)
-# then the attribute should be set to ''.
-# https://docs.python.org/3.8/library/types.html
-__package__ = ''

hf_env/Lib/site-packages/_yaml/__pycache__/__init__.cpython-312.pyc DELETED Viewed

Binary file (858 Bytes)

hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/INSTALLER DELETED Viewed

	@@ -1 +0,0 @@
1	- pip

hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/LICENSE DELETED Viewed

@@ -1,20 +0,0 @@
-This package contains a modified version of ca-bundle.crt:
-ca-bundle.crt -- Bundle of CA Root Certificates
-This is a bundle of X.509 certificates of public Certificate Authorities
-(CA). These were automatically extracted from Mozilla's root certificates
-file (certdata.txt).  This file can be found in the mozilla source tree:
-https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
-It contains the certificates in PEM format and therefore
-can be directly used with curl / libcurl / php_curl, or with
-an Apache+mod_ssl webserver for SSL client authentication.
-Just configure this file as the SSLCACertificateFile.#
-***** BEGIN LICENSE BLOCK *****
-This Source Code Form is subject to the terms of the Mozilla Public License,
-v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
-one at http://mozilla.org/MPL/2.0/.
-***** END LICENSE BLOCK *****
-@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $

hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/METADATA DELETED Viewed

@@ -1,77 +0,0 @@
-Metadata-Version: 2.2
-Name: certifi
-Version: 2025.1.31
-Summary: Python package for providing Mozilla's CA Bundle.
-Home-page: https://github.com/certifi/python-certifi
-Author: Kenneth Reitz
-Author-email: me@kennethreitz.com
-License: MPL-2.0
-Project-URL: Source, https://github.com/certifi/python-certifi
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
-Classifier: Natural Language :: English
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.6
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Requires-Python: >=3.6
-License-File: LICENSE
-Dynamic: author
-Dynamic: author-email
-Dynamic: classifier
-Dynamic: description
-Dynamic: home-page
-Dynamic: license
-Dynamic: project-url
-Dynamic: requires-python
-Dynamic: summary
-Certifi: Python SSL Certificates
-================================
-Certifi provides Mozilla's carefully curated collection of Root Certificates for
-validating the trustworthiness of SSL certificates while verifying the identity
-of TLS hosts. It has been extracted from the `Requests`_ project.
-Installation
-------------
-``certifi`` is available on PyPI. Simply install it with ``pip``::
-    $ pip install certifi
-Usage
------
-To reference the installed certificate authority (CA) bundle, you can use the
-built-in function::
-    >>> import certifi
-    >>> certifi.where()
-    '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
-Or from the command line::
-    $ python -m certifi
-    /usr/local/lib/python3.7/site-packages/certifi/cacert.pem
-Enjoy!
-.. _`Requests`: https://requests.readthedocs.io/en/master/
-Addition/Removal of Certificates
---------------------------------
-Certifi does not support any addition/removal or other modification of the
-CA trust store content. This project is intended to provide a reliable and
-highly portable root of trust to python deployments. Look to upstream projects
-for methods to use alternate trust.

hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-certifi-2025.1.31.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
-certifi-2025.1.31.dist-info/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
-certifi-2025.1.31.dist-info/METADATA,sha256=t5kcT5aGu0dQ6_psUNZYTqnC0uCRnponewm3uYjeHbg,2451
-certifi-2025.1.31.dist-info/RECORD,,
-certifi-2025.1.31.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-certifi-2025.1.31.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
-certifi/__init__.py,sha256=neIaAf7BM36ygmQCmy-ZsSyjnvjWghFeu13wwEAnjj0,94
-certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
-certifi/__pycache__/__init__.cpython-312.pyc,,
-certifi/__pycache__/__main__.cpython-312.pyc,,
-certifi/__pycache__/core.cpython-312.pyc,,
-certifi/cacert.pem,sha256=xVsh-Qf3-G1IrdCTVS-1ZRdJ_1-GBQjMu0I9bB-9gMc,297255
-certifi/core.py,sha256=qRDDFyXVJwTB_EmoGppaXU_R9qCZvhl-EzxPMuV3nTA,4426
-certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0

hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/WHEEL DELETED Viewed

@@ -1,5 +0,0 @@
-Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
-Root-Is-Purelib: true
-Tag: py3-none-any

hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- certifi

hf_env/Lib/site-packages/certifi/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-from .core import contents, where
-__all__ = ["contents", "where"]
-__version__ = "2025.01.31"

hf_env/Lib/site-packages/certifi/__main__.py DELETED Viewed

@@ -1,12 +0,0 @@
-import argparse
-from certifi import contents, where
-parser = argparse.ArgumentParser()
-parser.add_argument("-c", "--contents", action="store_true")
-args = parser.parse_args()
-if args.contents:
-    print(contents())
-else:
-    print(where())

hf_env/Lib/site-packages/certifi/__pycache__/__init__.cpython-312.pyc DELETED Viewed

Binary file (305 Bytes)

hf_env/Lib/site-packages/certifi/__pycache__/__main__.cpython-312.pyc DELETED Viewed

Binary file (620 Bytes)

hf_env/Lib/site-packages/certifi/__pycache__/core.cpython-312.pyc DELETED Viewed

Binary file (3.18 kB)

hf_env/Lib/site-packages/certifi/cacert.pem DELETED Viewed

The diff for this file is too large to render. See raw diff

hf_env/Lib/site-packages/certifi/core.py DELETED Viewed

@@ -1,114 +0,0 @@
-"""
-certifi.py
-~~~~~~~~~~
-This module returns the installation location of cacert.pem or its contents.
-"""
-import sys
-import atexit
-def exit_cacert_ctx() -> None:
-    _CACERT_CTX.__exit__(None, None, None)  # type: ignore[union-attr]
-if sys.version_info >= (3, 11):
-    from importlib.resources import as_file, files
-    _CACERT_CTX = None
-    _CACERT_PATH = None
-    def where() -> str:
-        # This is slightly terrible, but we want to delay extracting the file
-        # in cases where we're inside of a zipimport situation until someone
-        # actually calls where(), but we don't want to re-extract the file
-        # on every call of where(), so we'll do it once then store it in a
-        # global variable.
-        global _CACERT_CTX
-        global _CACERT_PATH
-        if _CACERT_PATH is None:
-            # This is slightly janky, the importlib.resources API wants you to
-            # manage the cleanup of this file, so it doesn't actually return a
-            # path, it returns a context manager that will give you the path
-            # when you enter it and will do any cleanup when you leave it. In
-            # the common case of not needing a temporary file, it will just
-            # return the file system location and the __exit__() is a no-op.
-            #
-            # We also have to hold onto the actual context manager, because
-            # it will do the cleanup whenever it gets garbage collected, so
-            # we will also store that at the global level as well.
-            _CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
-            _CACERT_PATH = str(_CACERT_CTX.__enter__())
-            atexit.register(exit_cacert_ctx)
-        return _CACERT_PATH
-    def contents() -> str:
-        return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
-elif sys.version_info >= (3, 7):
-    from importlib.resources import path as get_path, read_text
-    _CACERT_CTX = None
-    _CACERT_PATH = None
-    def where() -> str:
-        # This is slightly terrible, but we want to delay extracting the
-        # file in cases where we're inside of a zipimport situation until
-        # someone actually calls where(), but we don't want to re-extract
-        # the file on every call of where(), so we'll do it once then store
-        # it in a global variable.
-        global _CACERT_CTX
-        global _CACERT_PATH
-        if _CACERT_PATH is None:
-            # This is slightly janky, the importlib.resources API wants you
-            # to manage the cleanup of this file, so it doesn't actually
-            # return a path, it returns a context manager that will give
-            # you the path when you enter it and will do any cleanup when
-            # you leave it. In the common case of not needing a temporary
-            # file, it will just return the file system location and the
-            # __exit__() is a no-op.
-            #
-            # We also have to hold onto the actual context manager, because
-            # it will do the cleanup whenever it gets garbage collected, so
-            # we will also store that at the global level as well.
-            _CACERT_CTX = get_path("certifi", "cacert.pem")
-            _CACERT_PATH = str(_CACERT_CTX.__enter__())
-            atexit.register(exit_cacert_ctx)
-        return _CACERT_PATH
-    def contents() -> str:
-        return read_text("certifi", "cacert.pem", encoding="ascii")
-else:
-    import os
-    import types
-    from typing import Union
-    Package = Union[types.ModuleType, str]
-    Resource = Union[str, "os.PathLike"]
-    # This fallback will work for Python versions prior to 3.7 that lack the
-    # importlib.resources module but relies on the existing `where` function
-    # so won't address issues with environments like PyOxidizer that don't set
-    # __file__ on modules.
-    def read_text(
-        package: Package,
-        resource: Resource,
-        encoding: str = 'utf-8',
-        errors: str = 'strict'
-    ) -> str:
-        with open(where(), encoding=encoding) as data:
-            return data.read()
-    # If we don't have importlib.resources, then we will just do the old logic
-    # of assuming we're on the filesystem and munge the path directly.
-    def where() -> str:
-        f = os.path.dirname(__file__)
-        return os.path.join(f, "cacert.pem")
-    def contents() -> str:
-        return read_text("certifi", "cacert.pem", encoding="ascii")

hf_env/Lib/site-packages/certifi/py.typed DELETED Viewed

File without changes

hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER DELETED Viewed

	@@ -1 +0,0 @@
1	- pip

hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2025 TAHRI Ahmed R.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/METADATA DELETED Viewed

@@ -1,721 +0,0 @@
-Metadata-Version: 2.1
-Name: charset-normalizer
-Version: 3.4.1
-Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
-Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
-Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
-License: MIT
-Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
-Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
-Project-URL: Code, https://github.com/jawah/charset_normalizer
-Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
-Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: Implementation :: CPython
-Classifier: Programming Language :: Python :: Implementation :: PyPy
-Classifier: Topic :: Text Processing :: Linguistic
-Classifier: Topic :: Utilities
-Classifier: Typing :: Typed
-Requires-Python: >=3.7
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Provides-Extra: unicode-backport
-<h1 align="center">Charset Detection, for Everyone 👋</h1>
-<p align="center">
-  <sup>The Real First Universal Charset Detector</sup><br>
-  <a href="https://pypi.org/project/charset-normalizer">
-    <img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
-  </a>
-  <a href="https://pepy.tech/project/charset-normalizer/">
-    <img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
-  </a>
-  <a href="https://bestpractices.coreinfrastructure.org/projects/7297">
-    <img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
-  </a>
-</p>
-<p align="center">
-  <sup><i>Featured Packages</i></sup><br>
-  <a href="https://github.com/jawah/niquests">
-   <img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Best_HTTP_Client-cyan">
-  </a>
-  <a href="https://github.com/jawah/wassima">
-   <img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Killer-cyan">
-  </a>
-</p>
-<p align="center">
-  <sup><i>In other language (unofficial port - by the community)</i></sup><br>
-  <a href="https://github.com/nickspring/charset-normalizer-rs">
-   <img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
-  </a>
-</p>
-> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
-> I'm trying to resolve the issue by taking a new approach.
-> All IANA character set names for which the Python core library provides codecs are supported.
-<p align="center">
-  >>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
-</p>
-This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
-| Feature                                          | [Chardet](https://github.com/chardet/chardet) |                                         Charset Normalizer                                         | [cChardet](https://github.com/PyYoshi/cChardet) |
-|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
-| `Fast`                                           |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
-| `Universal**`                                    |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
-| `Reliable` **without** distinguishable standards |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
-| `Reliable` **with** distinguishable standards    |                       ✅                       |                                                 ✅                                                  |                        ✅                        |
-| `License`                                        |           LGPL-2.1<br>_restrictive_           |                                                MIT                                                 |            MPL-1.1<br>_restrictive_             |
-| `Native Python`                                  |                       ✅                       |                                                 ✅                                                  |                        ❌                        |
-| `Detect spoken language`                         |                       ❌                       |                                                 ✅                                                  |                       N/A                       |
-| `UnicodeDecodeError Safety`                      |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
-| `Whl Size (min)`                                 |                   193.6 kB                    |                                               42 kB                                                |                     ~200 kB                     |
-| `Supported Encoding`                             |                      33                       | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) |                       40                        |
-<p align="center">
-<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
-</p>
-*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
-## ⚡ Performance
-This package offer better performance than its counterpart Chardet. Here are some numbers.
-| Package                                       | Accuracy | Mean per file (ms) | File per sec (est) |
-|-----------------------------------------------|:--------:|:------------------:|:------------------:|
-| [chardet](https://github.com/chardet/chardet) |   86 %   |       63 ms        |    16 file/sec     |
-| charset-normalizer                            | **98 %** |     **10 ms**      |    100 file/sec    |
-| Package                                       | 99th percentile | 95th percentile | 50th percentile |
-|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
-| [chardet](https://github.com/chardet/chardet) |     265 ms      |      71 ms      |      7 ms       |
-| charset-normalizer                            |     100 ms      |      50 ms      |      5 ms       |
-_updated as of december 2024 using CPython 3.12_
-Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
-> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
-> And yes, these results might change at any time. The dataset can be updated to include more files.
-> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
-> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
-> (e.g. Supported Encoding) Challenge-them if you want.
-## ✨ Installation
-Using pip:
-```sh
-pip install charset-normalizer -U
-```
-## 🚀 Basic Usage
-### CLI
-This package comes with a CLI.
-```
-usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
-                  file [file ...]
-The Real First Universal Charset Detector. Discover originating encoding used
-on text file. Normalize text to unicode.
-positional arguments:
-  files                 File(s) to be analysed
-optional arguments:
-  -h, --help            show this help message and exit
-  -v, --verbose         Display complementary information about file if any.
-                        Stdout will contain logs about the detection process.
-  -a, --with-alternative
-                        Output complementary possibilities if any. Top-level
-                        JSON WILL be a list.
-  -n, --normalize       Permit to normalize input file. If not set, program
-                        does not write anything.
-  -m, --minimal         Only output the charset detected to STDOUT. Disabling
-                        JSON output.
-  -r, --replace         Replace file when trying to normalize it instead of
-                        creating a new one.
-  -f, --force           Replace file without asking if you are sure, use this
-                        flag with caution.
-  -t THRESHOLD, --threshold THRESHOLD
-                        Define a custom maximum amount of chaos allowed in
-                        decoded content. 0. <= chaos <= 1.
-  --version             Show version information and exit.
-```
-```bash
-normalizer ./data/sample.1.fr.srt
-```
-or
-```bash
-python -m charset_normalizer ./data/sample.1.fr.srt
-```
-🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
-```json
-{
-    "path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
-    "encoding": "cp1252",
-    "encoding_aliases": [
-        "1252",
-        "windows_1252"
-    ],
-    "alternative_encodings": [
-        "cp1254",
-        "cp1256",
-        "cp1258",
-        "iso8859_14",
-        "iso8859_15",
-        "iso8859_16",
-        "iso8859_3",
-        "iso8859_9",
-        "latin_1",
-        "mbcs"
-    ],
-    "language": "French",
-    "alphabets": [
-        "Basic Latin",
-        "Latin-1 Supplement"
-    ],
-    "has_sig_or_bom": false,
-    "chaos": 0.149,
-    "coherence": 97.152,
-    "unicode_path": null,
-    "is_preferred": true
-}
-```
-### Python
-*Just print out normalized text*
-```python
-from charset_normalizer import from_path
-results = from_path('./my_subtitle.srt')
-print(str(results.best()))
-```
-*Upgrade your code without effort*
-```python
-from charset_normalizer import detect
-```
-The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
-See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
-## 😇 Why
-When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
-reliable alternative using a completely different method. Also! I never back down on a good challenge!
-I **don't care** about the **originating charset** encoding, because **two different tables** can
-produce **two identical rendered string.**
-What I want is to get readable text, the best I can.
-In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
-Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
-## 🍰 How
-  - Discard all charset encoding table that could not fit the binary content.
-  - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
-  - Extract matches with the lowest mess detected.
-  - Additionally, we measure coherence / probe for a language.
-**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
-*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
-**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
- I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
- improve or rewrite it.
-*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
-that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
-## ⚡ Known limitations
-  - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
-  - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
-## ⚠️ About Python EOLs
-**If you are running:**
-- Python >=2.7,<3.5: Unsupported
-- Python 3.5: charset-normalizer < 2.1
-- Python 3.6: charset-normalizer < 3.1
-- Python 3.7: charset-normalizer < 4.0
-Upgrade your Python interpreter as soon as possible.
-## 👤 Contributing
-Contributions, issues and feature requests are very much welcome.<br />
-Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
-## 📝 License
-Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
-This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
-Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
-## 💼 For Enterprise
-Professional support for charset-normalizer is available as part of the [Tidelift
-Subscription][1]. Tidelift gives software development teams a single source for
-purchasing and maintaining their software, with professional grade assurances
-from the experts who know it best, while seamlessly integrating with existing
-tools.
-[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
-[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
-# Changelog
-All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
-## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
-### Changed
-- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
-- Enforce annotation delayed loading for a simpler and consistent types in the project.
-- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
-### Added
-- pre-commit configuration.
-- noxfile.
-### Removed
-- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
-- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
-- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
-- Unused `utils.range_scan` function.
-### Fixed
-- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
-- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
-## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
-### Added
-- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
-- Support for Python 3.13 (#512)
-### Fixed
-- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
-- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
-- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
-## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
-### Fixed
-- Unintentional memory usage regression when using large payload that match several encoding (#376)
-- Regression on some detection case showcased in the documentation (#371)
-### Added
-- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
-## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
-### Changed
-- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
-- Improved the general detection reliability based on reports from the community
-## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
-### Added
-- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
-- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
-### Removed
-- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
-- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
-### Changed
-- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
-- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
-### Fixed
-- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
-## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
-### Changed
-- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
-- Minor improvement over the global detection reliability
-### Added
-- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
-- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
-- Explicit support for Python 3.12
-### Fixed
-- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
-## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
-### Added
-- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
-### Removed
-- Support for Python 3.6 (PR #260)
-### Changed
-- Optional speedup provided by mypy/c 1.0.1
-## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
-### Fixed
-- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
-### Changed
-- Speedup provided by mypy/c 0.990 on Python >= 3.7
-## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
-### Added
-- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
-- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
-- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
-- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
-### Changed
-- Build with static metadata using 'build' frontend
-- Make the language detection stricter
-- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
-### Fixed
-- CLI with opt --normalize fail when using full path for files
-- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
-- Sphinx warnings when generating the documentation
-### Removed
-- Coherence detector no longer return 'Simple English' instead return 'English'
-- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
-- Breaking: Method `first()` and `best()` from CharsetMatch
-- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
-- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
-- Breaking: Top-level function `normalize`
-- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
-- Support for the backport `unicodedata2`
-## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
-### Added
-- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
-- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
-- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
-### Changed
-- Build with static metadata using 'build' frontend
-- Make the language detection stricter
-### Fixed
-- CLI with opt --normalize fail when using full path for files
-- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
-### Removed
-- Coherence detector no longer return 'Simple English' instead return 'English'
-- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
-## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
-### Added
-- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
-### Removed
-- Breaking: Method `first()` and `best()` from CharsetMatch
-- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
-### Fixed
-- Sphinx warnings when generating the documentation
-## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
-### Changed
-- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
-### Removed
-- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
-- Breaking: Top-level function `normalize`
-- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
-- Support for the backport `unicodedata2`
-## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
-### Deprecated
-- Function `normalize` scheduled for removal in 3.0
-### Changed
-- Removed useless call to decode in fn is_unprintable (#206)
-### Fixed
-- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
-## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
-### Added
-- Output the Unicode table version when running the CLI with `--version` (PR #194)
-### Changed
-- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
-- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
-### Fixed
-- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
-- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
-### Removed
-- Support for Python 3.5 (PR #192)
-### Deprecated
-- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
-## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
-### Fixed
-- ASCII miss-detection on rare cases (PR #170)
-## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
-### Added
-- Explicit support for Python 3.11 (PR #164)
-### Changed
-- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
-## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
-### Fixed
-- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
-### Changed
-- Skipping the language-detection (CD) on ASCII (PR #155)
-## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
-### Changed
-- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
-### Fixed
-- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
-## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
-### Changed
-- Improvement over Vietnamese detection (PR #126)
-- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
-- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
-- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
-- Code style as refactored by Sourcery-AI (PR #131)
-- Minor adjustment on the MD around european words (PR #133)
-- Remove and replace SRTs from assets / tests (PR #139)
-- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
-- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
-### Fixed
-- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
-- Avoid using too insignificant chunk (PR #137)
-### Added
-- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
-- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
-## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
-### Added
-- Add support for Kazakh (Cyrillic) language detection (PR #109)
-### Changed
-- Further, improve inferring the language from a given single-byte code page (PR #112)
-- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
-- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
-- Various detection improvement (MD+CD) (PR #117)
-### Removed
-- Remove redundant logging entry about detected language(s) (PR #115)
-### Fixed
-- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
-## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
-### Fixed
-- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
-- Fix CLI crash when using --minimal output in certain cases (PR #103)
-### Changed
-- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
-## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
-### Changed
-- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
-- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
-- The Unicode detection is slightly improved (PR #93)
-- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
-### Removed
-- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
-### Fixed
-- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
-- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
-- The MANIFEST.in was not exhaustive (PR #78)
-## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
-### Fixed
-- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
-- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
-- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
-- Submatch factoring could be wrong in rare edge cases (PR #72)
-- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
-- Fix line endings from CRLF to LF for certain project files (PR #67)
-### Changed
-- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
-- Allow fallback on specified encoding if any (PR #71)
-## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
-### Changed
-- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
-- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
-## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
-### Fixed
-- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
-### Changed
-- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
-## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
-### Fixed
-- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
-- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
-- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
-- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
-### Changed
-- Public function normalize default args values were not aligned with from_bytes (PR #53)
-### Added
-- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
-## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
-### Changed
-- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
-- Accent has been made on UTF-8 detection, should perform rather instantaneous.
-- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
-- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
-- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
-- utf_7 detection has been reinstated.
-### Removed
-- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
-- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
-- The exception hook on UnicodeDecodeError has been removed.
-### Deprecated
-- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
-### Fixed
-- The CLI output used the relative path of the file(s). Should be absolute.
-## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
-### Fixed
-- Logger configuration/usage no longer conflict with others (PR #44)
-## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
-### Removed
-- Using standard logging instead of using the package loguru.
-- Dropping nose test framework in favor of the maintained pytest.
-- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
-- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
-- Stop support for UTF-7 that does not contain a SIG.
-- Dropping PrettyTable, replaced with pure JSON output in CLI.
-### Fixed
-- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
-- Not searching properly for the BOM when trying utf32/16 parent codec.
-### Changed
-- Improving the package final size by compressing frequencies.json.
-- Huge improvement over the larges payload.
-### Added
-- CLI now produces JSON consumable output.
-- Return ASCII if given sequences fit. Given reasonable confidence.
-## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
-### Fixed
-- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
-## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
-### Fixed
-- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
-## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
-### Fixed
-- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
-## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
-### Changed
-- Amend the previous release to allow prettytable 2.0 (PR #35)
-## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
-### Fixed
-- Fix error while using the package with a python pre-release interpreter (PR #33)
-### Changed
-- Dependencies refactoring, constraints revised.
-### Added
-- Add python 3.9 and 3.10 to the supported interpreters
-MIT License
-Copyright (c) 2025 TAHRI Ahmed R.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/RECORD DELETED Viewed

@@ -1,35 +0,0 @@
-../../Scripts/normalizer.exe,sha256=rjmQpEaKnhN2uxmQpPX5uvykP-ehMGTO6RzOXLN_RJY,108424
-charset_normalizer-3.4.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
-charset_normalizer-3.4.1.dist-info/LICENSE,sha256=GFd0hdNwTxpHne2OVzwJds_tMV_S_ReYP6mI2kwvcNE,1092
-charset_normalizer-3.4.1.dist-info/METADATA,sha256=0_fAC3DknimRZusm6kkP4ylPD0JVzBq5mKHWLNBJM6w,36034
-charset_normalizer-3.4.1.dist-info/RECORD,,
-charset_normalizer-3.4.1.dist-info/WHEEL,sha256=pWXrJbnZSH-J-PhYmKs2XNn4DHCPNBYq965vsBJBFvA,101
-charset_normalizer-3.4.1.dist-info/entry_points.txt,sha256=8C-Y3iXIfyXQ83Tpir2B8t-XLJYpxF5xbb38d_js-h4,65
-charset_normalizer-3.4.1.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
-charset_normalizer/__init__.py,sha256=0NT8MHi7SKq3juMqYfOdrkzjisK0L73lneNHH4qaUAs,1638
-charset_normalizer/__main__.py,sha256=2sj_BS6H0sU25C1bMqz9DVwa6kOK9lchSEbSU-_iu7M,115
-charset_normalizer/__pycache__/__init__.cpython-312.pyc,,
-charset_normalizer/__pycache__/__main__.cpython-312.pyc,,
-charset_normalizer/__pycache__/api.cpython-312.pyc,,
-charset_normalizer/__pycache__/cd.cpython-312.pyc,,
-charset_normalizer/__pycache__/constant.cpython-312.pyc,,
-charset_normalizer/__pycache__/legacy.cpython-312.pyc,,
-charset_normalizer/__pycache__/md.cpython-312.pyc,,
-charset_normalizer/__pycache__/models.cpython-312.pyc,,
-charset_normalizer/__pycache__/utils.cpython-312.pyc,,
-charset_normalizer/__pycache__/version.cpython-312.pyc,,
-charset_normalizer/api.py,sha256=2a0p2Gnhbdo9O6C04CNxTSN23fIbgOF20nxb0pWPNFM,23285
-charset_normalizer/cd.py,sha256=uq8nVxRpR6Guc16ACvOWtL8KO3w7vYaCh8hHisuOyTg,12917
-charset_normalizer/cli/__init__.py,sha256=d9MUx-1V_qD3x9igIy4JT4oC5CU0yjulk7QyZWeRFhg,144
-charset_normalizer/cli/__main__.py,sha256=lZ89qRWun7FRxX0qm1GhK-m0DH0i048yiMAX1mVIuRg,10731
-charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc,,
-charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc,,
-charset_normalizer/constant.py,sha256=7OKYi28cJjZxIcX3lQCwfK9ijoOgaVEbERww7SqqNSY,42475
-charset_normalizer/legacy.py,sha256=v8An1aAQHUu036UWOhyIaDGkirZ0t4hfNVlyje5KInU,2394
-charset_normalizer/md.cp312-win_amd64.pyd,sha256=XBGy--IKda7c3iBfvw_dovocqb2RSucmVtxvtlG_3tA,10752
-charset_normalizer/md.py,sha256=e452fhwIAguEUr3FJzG7QZvFgXI-dVLOh_M1ZUiFI6U,20666
-charset_normalizer/md__mypyc.cp312-win_amd64.pyd,sha256=_-jWSji0BgBVvrIHbmabYQNMBF4-xTusdO5mu6P8JsA,125440
-charset_normalizer/models.py,sha256=ZR2PE-fqf6dASZfqdE5Uhkmr0o1MciSdXOjuNqwkmvg,12754
-charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-charset_normalizer/utils.py,sha256=oH9Q3WcAMwmsSB7uM8uDozz9DXnkYecbkTNbdnMbgzI,12410
-charset_normalizer/version.py,sha256=7_thI7FzRQxEsbtUYwrJs3FCFWF666mw74H8mggPRR0,123

hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL DELETED Viewed

@@ -1,5 +0,0 @@
-Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
-Root-Is-Purelib: false
-Tag: cp312-cp312-win_amd64

hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- [console_scripts]
2	- normalizer = charset_normalizer:cli.cli_detect

hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- charset_normalizer

hf_env/Lib/site-packages/charset_normalizer/__init__.py DELETED Viewed

@@ -1,48 +0,0 @@
-"""
-Charset-Normalizer
-~~~~~~~~~~~~~~
-The Real First Universal Charset Detector.
-A library that helps you read text from an unknown charset encoding.
-Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
-All IANA character set names for which the Python core library provides codecs are supported.
-Basic usage:
-   >>> from charset_normalizer import from_bytes
-   >>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
-   >>> best_guess = results.best()
-   >>> str(best_guess)
-   'Bсеки човек има право на образование. Oбразованието!'
-Others methods and usages are available - see the full documentation
-at <https://github.com/Ousret/charset_normalizer>.
-:copyright: (c) 2021 by Ahmed TAHRI
-:license: MIT, see LICENSE for more details.
-"""
-from __future__ import annotations
-import logging
-from .api import from_bytes, from_fp, from_path, is_binary
-from .legacy import detect
-from .models import CharsetMatch, CharsetMatches
-from .utils import set_logging_handler
-from .version import VERSION, __version__
-__all__ = (
-    "from_fp",
-    "from_path",
-    "from_bytes",
-    "is_binary",
-    "detect",
-    "CharsetMatch",
-    "CharsetMatches",
-    "__version__",
-    "VERSION",
-    "set_logging_handler",
-)
-# Attach a NullHandler to the top level logger by default
-# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
-logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())

hf_env/Lib/site-packages/charset_normalizer/__main__.py DELETED Viewed

@@ -1,6 +0,0 @@
-from __future__ import annotations
-from .cli import cli_detect
-if __name__ == "__main__":
-    cli_detect()