Saving local changes before rebase
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .env +9 -0
- .env.example +5 -2
- app.py +12 -2
- app/__init__.py +34 -16
- app/__pycache__/__init__.cpython-312.pyc +0 -0
- app/models/google_ad.py +51 -0
- app/routes/compliance.py +8 -8
- app/routes/google_ads.py +188 -0
- app/services/ai_processor.py +9 -2
- app/services/google_scraper.py +172 -0
- app/templates/base.html +43 -11
- app/templates/dashboard.html +85 -21
- app/templates/google_ads/display.html +95 -0
- app/templates/google_ads/index.html +49 -0
- app/templates/google_ads/results.html +252 -0
- app/templates/google_ads/search.html +80 -0
- app/utils/decorators.py +1 -1
- config.py +36 -4
- hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/INSTALLER +0 -1
- hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/LICENSE +0 -20
- hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/METADATA +0 -46
- hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/RECORD +0 -43
- hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/WHEEL +0 -5
- hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/top_level.txt +0 -2
- hf_env/Lib/site-packages/__pycache__/typing_extensions.cpython-312.pyc +0 -0
- hf_env/Lib/site-packages/_yaml/__init__.py +0 -33
- hf_env/Lib/site-packages/_yaml/__pycache__/__init__.cpython-312.pyc +0 -0
- hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/INSTALLER +0 -1
- hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/LICENSE +0 -20
- hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/METADATA +0 -77
- hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/RECORD +0 -14
- hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/WHEEL +0 -5
- hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/top_level.txt +0 -1
- hf_env/Lib/site-packages/certifi/__init__.py +0 -4
- hf_env/Lib/site-packages/certifi/__main__.py +0 -12
- hf_env/Lib/site-packages/certifi/__pycache__/__init__.cpython-312.pyc +0 -0
- hf_env/Lib/site-packages/certifi/__pycache__/__main__.cpython-312.pyc +0 -0
- hf_env/Lib/site-packages/certifi/__pycache__/core.cpython-312.pyc +0 -0
- hf_env/Lib/site-packages/certifi/cacert.pem +0 -0
- hf_env/Lib/site-packages/certifi/core.py +0 -114
- hf_env/Lib/site-packages/certifi/py.typed +0 -0
- hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER +0 -1
- hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE +0 -21
- hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/METADATA +0 -721
- hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/RECORD +0 -35
- hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL +0 -5
- hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt +0 -2
- hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt +0 -1
- hf_env/Lib/site-packages/charset_normalizer/__init__.py +0 -48
- hf_env/Lib/site-packages/charset_normalizer/__main__.py +0 -6
.env
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FLASK_APP=app.py
|
| 2 |
+
FLASK_ENV=development
|
| 3 |
+
SECRET_KEY=your-secret-key-here
|
| 4 |
+
DATABASE_URL=postgresql://user:password@localhost:5432/facebook_ads
|
| 5 |
+
CELERY_BROKER_URL=redis://localhost:6379/0
|
| 6 |
+
CELERY_RESULT_BACKEND=redis://localhost:6379/0
|
| 7 |
+
OPENAI_API_KEY=your-openai-api-key-here
|
| 8 |
+
INSTANCE_PATH=/tmp/instance
|
| 9 |
+
SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub
|
.env.example
CHANGED
|
@@ -1,6 +1,9 @@
|
|
| 1 |
FLASK_APP=app.py
|
| 2 |
FLASK_ENV=development
|
| 3 |
SECRET_KEY=your-secret-key-here
|
| 4 |
-
DATABASE_URL=postgresql://user:password@localhost:5432/
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
| 6 |
SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub
|
|
|
|
| 1 |
FLASK_APP=app.py
|
| 2 |
FLASK_ENV=development
|
| 3 |
SECRET_KEY=your-secret-key-here
|
| 4 |
+
DATABASE_URL=postgresql://user:password@localhost:5432/facebook_ads
|
| 5 |
+
CELERY_BROKER_URL=redis://localhost:6379/0
|
| 6 |
+
CELERY_RESULT_BACKEND=redis://localhost:6379/0
|
| 7 |
+
OPENAI_API_KEY=your-openai-api-key-here
|
| 8 |
+
INSTANCE_PATH=/tmp/instance
|
| 9 |
SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub
|
app.py
CHANGED
|
@@ -1,6 +1,16 @@
|
|
| 1 |
from flask import Flask
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
def create_app():
|
| 4 |
-
app =
|
| 5 |
-
app.config
|
|
|
|
| 6 |
return app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from flask import Flask
|
| 2 |
+
from flask_migrate import Migrate
|
| 3 |
+
from app import db, create_app as create_flask_app
|
| 4 |
+
from config import get_config
|
| 5 |
+
|
| 6 |
+
migrate = Migrate()
|
| 7 |
|
| 8 |
def create_app():
|
| 9 |
+
app = create_flask_app()
|
| 10 |
+
app.config.from_object(get_config())
|
| 11 |
+
migrate.init_app(app, db)
|
| 12 |
return app
|
| 13 |
+
|
| 14 |
+
if __name__ == "__main__":
|
| 15 |
+
app = create_app()
|
| 16 |
+
app.run(debug=True)
|
app/__init__.py
CHANGED
|
@@ -3,35 +3,53 @@ from flask_sqlalchemy import SQLAlchemy
|
|
| 3 |
from flask_login import LoginManager
|
| 4 |
from celery import Celery
|
| 5 |
import redis
|
|
|
|
|
|
|
| 6 |
|
|
|
|
| 7 |
db = SQLAlchemy()
|
| 8 |
login = LoginManager()
|
|
|
|
| 9 |
celery = Celery(__name__)
|
| 10 |
-
cache =
|
| 11 |
|
| 12 |
-
def create_app():
|
| 13 |
-
# Create the Flask app
|
| 14 |
app = Flask(__name__)
|
| 15 |
-
|
| 16 |
# Load configuration
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
app.instance_path = app.config['INSTANCE_PATH']
|
| 21 |
-
|
| 22 |
# Initialize extensions
|
| 23 |
db.init_app(app)
|
| 24 |
login.init_app(app)
|
|
|
|
|
|
|
| 25 |
celery.conf.update(app.config)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Register Blueprints
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
return app
|
|
|
|
| 3 |
from flask_login import LoginManager
|
| 4 |
from celery import Celery
|
| 5 |
import redis
|
| 6 |
+
import os
|
| 7 |
+
from pathlib import Path
|
| 8 |
|
| 9 |
+
# Initialize extensions
|
| 10 |
db = SQLAlchemy()
|
| 11 |
login = LoginManager()
|
| 12 |
+
login.login_view = 'auth.login'
|
| 13 |
celery = Celery(__name__)
|
| 14 |
+
cache = None # Initialize later when app context is available
|
| 15 |
|
| 16 |
+
def create_app(config_class=None):
|
| 17 |
+
# Create the Flask app
|
| 18 |
app = Flask(__name__)
|
| 19 |
+
|
| 20 |
# Load configuration
|
| 21 |
+
if config_class is None:
|
| 22 |
+
app.config.from_object('config.Config')
|
| 23 |
+
else:
|
| 24 |
+
app.config.from_object(config_class)
|
| 25 |
+
|
| 26 |
+
# Ensure instance path exists
|
| 27 |
+
Path(app.config['INSTANCE_PATH']).mkdir(parents=True, exist_ok=True)
|
| 28 |
app.instance_path = app.config['INSTANCE_PATH']
|
| 29 |
+
|
| 30 |
# Initialize extensions
|
| 31 |
db.init_app(app)
|
| 32 |
login.init_app(app)
|
| 33 |
+
|
| 34 |
+
# Configure Celery
|
| 35 |
celery.conf.update(app.config)
|
| 36 |
|
| 37 |
+
# Initialize Redis cache
|
| 38 |
+
global cache
|
| 39 |
+
cache = redis.Redis.from_url(app.config['CELERY_BROKER_URL'])
|
| 40 |
+
|
| 41 |
# Register Blueprints
|
| 42 |
+
with app.app_context():
|
| 43 |
+
from .routes.auth import auth_bp
|
| 44 |
+
from .routes.dashboard import dashboard_bp
|
| 45 |
+
from .routes.api import api_bp
|
| 46 |
+
from .routes.compliance import compliance_bp
|
| 47 |
+
from .routes.google_ads import google_ads_bp
|
| 48 |
+
|
| 49 |
+
app.register_blueprint(auth_bp)
|
| 50 |
+
app.register_blueprint(dashboard_bp)
|
| 51 |
+
app.register_blueprint(api_bp)
|
| 52 |
+
app.register_blueprint(compliance_bp)
|
| 53 |
+
app.register_blueprint(google_ads_bp)
|
| 54 |
|
| 55 |
return app
|
app/__pycache__/__init__.cpython-312.pyc
CHANGED
|
Binary files a/app/__pycache__/__init__.cpython-312.pyc and b/app/__pycache__/__init__.cpython-312.pyc differ
|
|
|
app/models/google_ad.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app import db
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
import uuid
|
| 4 |
+
|
| 5 |
+
class GoogleAd(db.Model):
|
| 6 |
+
"""Model for storing Google Ads data."""
|
| 7 |
+
id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
| 8 |
+
ad_type = db.Column(db.String(20), nullable=False) # 'search' or 'display'
|
| 9 |
+
title = db.Column(db.String(255), nullable=True)
|
| 10 |
+
description = db.Column(db.Text, nullable=True)
|
| 11 |
+
display_url = db.Column(db.String(255), nullable=True)
|
| 12 |
+
target_url = db.Column(db.String(512), nullable=True)
|
| 13 |
+
image_url = db.Column(db.String(512), nullable=True)
|
| 14 |
+
position = db.Column(db.Integer, nullable=True)
|
| 15 |
+
search_query = db.Column(db.String(255), nullable=True)
|
| 16 |
+
page_url = db.Column(db.String(512), nullable=True)
|
| 17 |
+
raw_data = db.Column(db.JSON, nullable=True)
|
| 18 |
+
sentiment = db.Column(db.JSON, nullable=True)
|
| 19 |
+
created_at = db.Column(db.DateTime, default=datetime.utcnow)
|
| 20 |
+
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
| 21 |
+
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=True)
|
| 22 |
+
|
| 23 |
+
def __repr__(self):
|
| 24 |
+
return f'<GoogleAd {self.id} - {self.title}>'
|
| 25 |
+
|
| 26 |
+
@classmethod
|
| 27 |
+
def from_search_ad_data(cls, ad_data, search_query, user_id=None):
|
| 28 |
+
"""Create a GoogleAd instance from scraped search ad data."""
|
| 29 |
+
return cls(
|
| 30 |
+
ad_type='search',
|
| 31 |
+
title=ad_data.get('title'),
|
| 32 |
+
description=ad_data.get('description'),
|
| 33 |
+
display_url=ad_data.get('display_url'),
|
| 34 |
+
target_url=ad_data.get('target_url'),
|
| 35 |
+
position=ad_data.get('position'),
|
| 36 |
+
search_query=search_query,
|
| 37 |
+
raw_data=ad_data,
|
| 38 |
+
user_id=user_id
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
@classmethod
|
| 42 |
+
def from_display_ad_data(cls, ad_data, user_id=None):
|
| 43 |
+
"""Create a GoogleAd instance from scraped display ad data."""
|
| 44 |
+
return cls(
|
| 45 |
+
ad_type='display',
|
| 46 |
+
image_url=ad_data.get('image_url'),
|
| 47 |
+
target_url=ad_data.get('target_url'),
|
| 48 |
+
page_url=ad_data.get('page_url'),
|
| 49 |
+
raw_data=ad_data,
|
| 50 |
+
user_id=user_id
|
| 51 |
+
)
|
app/routes/compliance.py
CHANGED
|
@@ -3,15 +3,11 @@ from flask_login import login_required
|
|
| 3 |
from ..models import Ad
|
| 4 |
from ..utils.decorators import admin_required
|
| 5 |
from .. import db
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
|
| 9 |
-
|
| 10 |
-
@login_required
|
| 11 |
-
@admin_required
|
| 12 |
-
def compliance_report():
|
| 13 |
-
ads = Ad.query.all()
|
| 14 |
-
return render_template('compliance_report.html', ads=ads)
|
| 15 |
|
| 16 |
@compliance_bp.route('/anonymize/<ad_id>', methods=['POST'])
|
| 17 |
@login_required
|
|
@@ -20,8 +16,12 @@ def anonymize_ad(ad_id):
|
|
| 20 |
try:
|
| 21 |
ad = Ad.query.get_or_404(ad_id)
|
| 22 |
ad.content = "REDACTED"
|
|
|
|
| 23 |
db.session.commit()
|
| 24 |
return jsonify({'status': 'success'})
|
| 25 |
except Exception as e:
|
| 26 |
db.session.rollback()
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from ..models import Ad
|
| 4 |
from ..utils.decorators import admin_required
|
| 5 |
from .. import db
|
| 6 |
+
import logging
|
| 7 |
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
+
compliance_bp = Blueprint('compliance', __name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
@compliance_bp.route('/anonymize/<ad_id>', methods=['POST'])
|
| 13 |
@login_required
|
|
|
|
| 16 |
try:
|
| 17 |
ad = Ad.query.get_or_404(ad_id)
|
| 18 |
ad.content = "REDACTED"
|
| 19 |
+
db.session.add(ad)
|
| 20 |
db.session.commit()
|
| 21 |
return jsonify({'status': 'success'})
|
| 22 |
except Exception as e:
|
| 23 |
db.session.rollback()
|
| 24 |
+
logger.error(f"Error anonymizing ad {ad_id}: {str(e)}")
|
| 25 |
+
return jsonify({'status': 'error', 'message': str(e)}), 500
|
| 26 |
+
finally:
|
| 27 |
+
db.session.close()
|
app/routes/google_ads.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Blueprint, render_template, request, jsonify, current_app
|
| 2 |
+
from flask_login import login_required, current_user
|
| 3 |
+
from app.services.google_scraper import GoogleAdsScraper
|
| 4 |
+
from app.models.google_ad import GoogleAd
|
| 5 |
+
from app.services.ai_processor import AIPipeline
|
| 6 |
+
from app import db, celery
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
google_ads_bp = Blueprint('google_ads', __name__, url_prefix='/google-ads')
|
| 11 |
+
|
| 12 |
+
@google_ads_bp.route('/', methods=['GET'])
|
| 13 |
+
@login_required
|
| 14 |
+
def index():
|
| 15 |
+
"""Google Ads dashboard page."""
|
| 16 |
+
return render_template('google_ads/index.html')
|
| 17 |
+
|
| 18 |
+
@google_ads_bp.route('/search', methods=['GET', 'POST'])
|
| 19 |
+
@login_required
|
| 20 |
+
def search_ads():
|
| 21 |
+
"""Search for Google Ads."""
|
| 22 |
+
if request.method == 'POST':
|
| 23 |
+
search_query = request.form.get('query')
|
| 24 |
+
num_pages = int(request.form.get('num_pages', 3))
|
| 25 |
+
|
| 26 |
+
# Start async task for scraping
|
| 27 |
+
task = scrape_google_search_ads.delay(search_query, num_pages, current_user.id)
|
| 28 |
+
|
| 29 |
+
return jsonify({
|
| 30 |
+
'status': 'success',
|
| 31 |
+
'message': 'Google Ads scraping started',
|
| 32 |
+
'task_id': task.id
|
| 33 |
+
})
|
| 34 |
+
|
| 35 |
+
# GET request - show search form
|
| 36 |
+
return render_template('google_ads/search.html')
|
| 37 |
+
|
| 38 |
+
@google_ads_bp.route('/display', methods=['GET', 'POST'])
|
| 39 |
+
@login_required
|
| 40 |
+
def display_ads():
|
| 41 |
+
"""Scrape display ads from a URL."""
|
| 42 |
+
if request.method == 'POST':
|
| 43 |
+
target_url = request.form.get('url')
|
| 44 |
+
scroll_count = int(request.form.get('scroll_count', 5))
|
| 45 |
+
|
| 46 |
+
# Start async task for scraping
|
| 47 |
+
task = scrape_google_display_ads.delay(target_url, scroll_count, current_user.id)
|
| 48 |
+
|
| 49 |
+
return jsonify({
|
| 50 |
+
'status': 'success',
|
| 51 |
+
'message': 'Google Display Ads scraping started',
|
| 52 |
+
'task_id': task.id
|
| 53 |
+
})
|
| 54 |
+
|
| 55 |
+
# GET request - show form
|
| 56 |
+
return render_template('google_ads/display.html')
|
| 57 |
+
|
| 58 |
+
@google_ads_bp.route('/results', methods=['GET'])
|
| 59 |
+
@login_required
|
| 60 |
+
def view_results():
|
| 61 |
+
"""View Google Ads results."""
|
| 62 |
+
ad_type = request.args.get('type', 'all')
|
| 63 |
+
query = request.args.get('query', '')
|
| 64 |
+
|
| 65 |
+
# Build query
|
| 66 |
+
ads_query = GoogleAd.query
|
| 67 |
+
|
| 68 |
+
if ad_type != 'all':
|
| 69 |
+
ads_query = ads_query.filter(GoogleAd.ad_type == ad_type)
|
| 70 |
+
|
| 71 |
+
if query:
|
| 72 |
+
ads_query = ads_query.filter(
|
| 73 |
+
(GoogleAd.title.ilike(f'%{query}%')) |
|
| 74 |
+
(GoogleAd.description.ilike(f'%{query}%')) |
|
| 75 |
+
(GoogleAd.search_query.ilike(f'%{query}%'))
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Get results
|
| 79 |
+
ads = ads_query.order_by(GoogleAd.created_at.desc()).all()
|
| 80 |
+
|
| 81 |
+
return render_template('google_ads/results.html', ads=ads, ad_type=ad_type, query=query)
|
| 82 |
+
|
| 83 |
+
@google_ads_bp.route('/api/ads', methods=['GET'])
|
| 84 |
+
@login_required
|
| 85 |
+
def api_get_ads():
|
| 86 |
+
"""API endpoint to get Google Ads data."""
|
| 87 |
+
ad_type = request.args.get('type', 'all')
|
| 88 |
+
query = request.args.get('query', '')
|
| 89 |
+
limit = int(request.args.get('limit', 50))
|
| 90 |
+
|
| 91 |
+
# Build query
|
| 92 |
+
ads_query = GoogleAd.query
|
| 93 |
+
|
| 94 |
+
if ad_type != 'all':
|
| 95 |
+
ads_query = ads_query.filter(GoogleAd.ad_type == ad_type)
|
| 96 |
+
|
| 97 |
+
if query:
|
| 98 |
+
ads_query = ads_query.filter(
|
| 99 |
+
(GoogleAd.title.ilike(f'%{query}%')) |
|
| 100 |
+
(GoogleAd.description.ilike(f'%{query}%')) |
|
| 101 |
+
(GoogleAd.search_query.ilike(f'%{query}%'))
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# Get results
|
| 105 |
+
ads = ads_query.order_by(GoogleAd.created_at.desc()).limit(limit).all()
|
| 106 |
+
|
| 107 |
+
# Convert to JSON
|
| 108 |
+
result = []
|
| 109 |
+
for ad in ads:
|
| 110 |
+
ad_data = {
|
| 111 |
+
'id': ad.id,
|
| 112 |
+
'ad_type': ad.ad_type,
|
| 113 |
+
'title': ad.title,
|
| 114 |
+
'description': ad.description,
|
| 115 |
+
'display_url': ad.display_url,
|
| 116 |
+
'target_url': ad.target_url,
|
| 117 |
+
'image_url': ad.image_url,
|
| 118 |
+
'position': ad.position,
|
| 119 |
+
'search_query': ad.search_query,
|
| 120 |
+
'page_url': ad.page_url,
|
| 121 |
+
'sentiment': ad.sentiment,
|
| 122 |
+
'created_at': ad.created_at.isoformat() if ad.created_at else None
|
| 123 |
+
}
|
| 124 |
+
result.append(ad_data)
|
| 125 |
+
|
| 126 |
+
return jsonify(result)
|
| 127 |
+
|
| 128 |
+
@celery.task
|
| 129 |
+
def scrape_google_search_ads(search_query, num_pages, user_id):
|
| 130 |
+
"""Celery task to scrape Google search ads."""
|
| 131 |
+
try:
|
| 132 |
+
scraper = GoogleAdsScraper()
|
| 133 |
+
ads_data = scraper.scrape_search_ads(search_query, num_pages)
|
| 134 |
+
|
| 135 |
+
# Process and store ads
|
| 136 |
+
ai_pipeline = AIPipeline()
|
| 137 |
+
|
| 138 |
+
for ad_data in ads_data:
|
| 139 |
+
# Create GoogleAd instance
|
| 140 |
+
ad = GoogleAd.from_search_ad_data(ad_data, search_query, user_id)
|
| 141 |
+
|
| 142 |
+
# Process with AI if there's content
|
| 143 |
+
if ad.title or ad.description:
|
| 144 |
+
try:
|
| 145 |
+
# Create a simple object with content for AI processing
|
| 146 |
+
ad_content = type('obj', (object,), {
|
| 147 |
+
'content': f"{ad.title} {ad.description}"
|
| 148 |
+
})
|
| 149 |
+
|
| 150 |
+
# Process with AI
|
| 151 |
+
ai_results = ai_pipeline.process_ad(ad_content)
|
| 152 |
+
ad.sentiment = ai_results.get('sentiment')
|
| 153 |
+
except Exception as e:
|
| 154 |
+
logger.error(f"Error processing ad with AI: {e}")
|
| 155 |
+
|
| 156 |
+
# Save to database
|
| 157 |
+
db.session.add(ad)
|
| 158 |
+
|
| 159 |
+
db.session.commit()
|
| 160 |
+
return {'status': 'success', 'count': len(ads_data)}
|
| 161 |
+
|
| 162 |
+
except Exception as e:
|
| 163 |
+
logger.error(f"Error in Google search ads scraping task: {e}")
|
| 164 |
+
db.session.rollback()
|
| 165 |
+
return {'status': 'error', 'message': str(e)}
|
| 166 |
+
|
| 167 |
+
@celery.task
|
| 168 |
+
def scrape_google_display_ads(target_url, scroll_count, user_id):
|
| 169 |
+
"""Celery task to scrape Google display ads."""
|
| 170 |
+
try:
|
| 171 |
+
scraper = GoogleAdsScraper()
|
| 172 |
+
ads_data = scraper.scrape_display_ads(target_url, scroll_count)
|
| 173 |
+
|
| 174 |
+
# Process and store ads
|
| 175 |
+
for ad_data in ads_data:
|
| 176 |
+
# Create GoogleAd instance
|
| 177 |
+
ad = GoogleAd.from_display_ad_data(ad_data, user_id)
|
| 178 |
+
|
| 179 |
+
# Save to database
|
| 180 |
+
db.session.add(ad)
|
| 181 |
+
|
| 182 |
+
db.session.commit()
|
| 183 |
+
return {'status': 'success', 'count': len(ads_data)}
|
| 184 |
+
|
| 185 |
+
except Exception as e:
|
| 186 |
+
logger.error(f"Error in Google display ads scraping task: {e}")
|
| 187 |
+
db.session.rollback()
|
| 188 |
+
return {'status': 'error', 'message': str(e)}
|
app/services/ai_processor.py
CHANGED
|
@@ -6,6 +6,10 @@ import logging
|
|
| 6 |
|
| 7 |
logger = logging.getLogger(__name__)
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
class AIPipeline:
|
| 10 |
def __init__(self):
|
| 11 |
try:
|
|
@@ -26,6 +30,9 @@ class AIPipeline:
|
|
| 26 |
raise
|
| 27 |
|
| 28 |
def process_ad(self, ad):
|
|
|
|
|
|
|
|
|
|
| 29 |
try:
|
| 30 |
results = {
|
| 31 |
"sentiment": self._analyze_sentiment(ad.content),
|
|
@@ -34,8 +41,8 @@ class AIPipeline:
|
|
| 34 |
}
|
| 35 |
return results
|
| 36 |
except Exception as e:
|
| 37 |
-
logger.error(f"Error processing ad: {e}")
|
| 38 |
-
|
| 39 |
|
| 40 |
def _analyze_sentiment(self, text):
|
| 41 |
try:
|
|
|
|
| 6 |
|
| 7 |
logger = logging.getLogger(__name__)
|
| 8 |
|
| 9 |
+
class ProcessingError(Exception):
|
| 10 |
+
"""Exception raised when ad processing fails."""
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
class AIPipeline:
|
| 14 |
def __init__(self):
|
| 15 |
try:
|
|
|
|
| 30 |
raise
|
| 31 |
|
| 32 |
def process_ad(self, ad):
|
| 33 |
+
if not ad:
|
| 34 |
+
raise ValueError("Ad content cannot be empty")
|
| 35 |
+
|
| 36 |
try:
|
| 37 |
results = {
|
| 38 |
"sentiment": self._analyze_sentiment(ad.content),
|
|
|
|
| 41 |
}
|
| 42 |
return results
|
| 43 |
except Exception as e:
|
| 44 |
+
logger.error(f"Error processing ad: {str(e)}")
|
| 45 |
+
raise ProcessingError(f"Failed to process ad: {str(e)}")
|
| 46 |
|
| 47 |
def _analyze_sentiment(self, text):
|
| 48 |
try:
|
app/services/google_scraper.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from selenium import webdriver
|
| 2 |
+
from selenium.webdriver.common.by import By
|
| 3 |
+
from selenium.webdriver.chrome.service import Service
|
| 4 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
| 5 |
+
from selenium.webdriver.support import expected_conditions as EC
|
| 6 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 7 |
+
import time
|
| 8 |
+
from selenium.common.exceptions import TimeoutException, WebDriverException
|
| 9 |
+
from contextlib import contextmanager
|
| 10 |
+
import logging
|
| 11 |
+
import json
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
class GoogleAdsScraper:
|
| 17 |
+
def __init__(self, selenium_hub_url=None):
|
| 18 |
+
self.driver = None
|
| 19 |
+
self.selenium_hub_url = selenium_hub_url or os.getenv('SELENIUM_HUB_URL')
|
| 20 |
+
|
| 21 |
+
def _setup_driver(self):
|
| 22 |
+
options = webdriver.ChromeOptions()
|
| 23 |
+
options.add_argument("--headless")
|
| 24 |
+
options.add_argument("--no-sandbox")
|
| 25 |
+
options.add_argument("--disable-dev-shm-usage")
|
| 26 |
+
|
| 27 |
+
if self.selenium_hub_url:
|
| 28 |
+
logger.info(f"Using Selenium Hub at {self.selenium_hub_url}")
|
| 29 |
+
return webdriver.Remote(
|
| 30 |
+
command_executor=self.selenium_hub_url,
|
| 31 |
+
options=options
|
| 32 |
+
)
|
| 33 |
+
else:
|
| 34 |
+
return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
|
| 35 |
+
|
| 36 |
+
@contextmanager
|
| 37 |
+
def _get_driver(self):
|
| 38 |
+
try:
|
| 39 |
+
self.driver = self._setup_driver()
|
| 40 |
+
yield self.driver
|
| 41 |
+
finally:
|
| 42 |
+
if self.driver:
|
| 43 |
+
self.driver.quit()
|
| 44 |
+
|
| 45 |
+
def scrape_search_ads(self, search_query, num_pages=3):
|
| 46 |
+
"""Scrape Google search ads for a given query."""
|
| 47 |
+
with self._get_driver() as driver:
|
| 48 |
+
try:
|
| 49 |
+
url = f"https://www.google.com/search?q={search_query}"
|
| 50 |
+
driver.get(url)
|
| 51 |
+
driver.implicitly_wait(5)
|
| 52 |
+
|
| 53 |
+
ads = []
|
| 54 |
+
|
| 55 |
+
# Process first page
|
| 56 |
+
ads.extend(self._extract_search_ads(driver))
|
| 57 |
+
|
| 58 |
+
# Navigate through additional pages if requested
|
| 59 |
+
for page in range(2, num_pages + 1):
|
| 60 |
+
try:
|
| 61 |
+
next_button = driver.find_element(By.ID, "pnnext")
|
| 62 |
+
next_button.click()
|
| 63 |
+
time.sleep(2)
|
| 64 |
+
ads.extend(self._extract_search_ads(driver))
|
| 65 |
+
except Exception as e:
|
| 66 |
+
logger.warning(f"Could not navigate to page {page}: {e}")
|
| 67 |
+
break
|
| 68 |
+
|
| 69 |
+
return ads
|
| 70 |
+
|
| 71 |
+
except (TimeoutException, WebDriverException) as e:
|
| 72 |
+
logger.error(f"Error during Google Ads scraping: {e}")
|
| 73 |
+
return []
|
| 74 |
+
|
| 75 |
+
def _extract_search_ads(self, driver):
|
| 76 |
+
"""Extract ad data from the current search results page."""
|
| 77 |
+
ads = []
|
| 78 |
+
try:
|
| 79 |
+
# Look for ad containers
|
| 80 |
+
ad_elements = driver.find_elements(By.CSS_SELECTOR, "div.uEierd")
|
| 81 |
+
|
| 82 |
+
for ad in ad_elements:
|
| 83 |
+
try:
|
| 84 |
+
ad_data = {}
|
| 85 |
+
|
| 86 |
+
# Extract ad title
|
| 87 |
+
title_element = ad.find_element(By.CSS_SELECTOR, "div.CCgQ5.vCa9Yd.QfkTvb.MUxGbd.v0nnCb")
|
| 88 |
+
ad_data["title"] = title_element.text if title_element else ""
|
| 89 |
+
|
| 90 |
+
# Extract ad description
|
| 91 |
+
desc_element = ad.find_element(By.CSS_SELECTOR, "div.MUxGbd.yDYNvb.lyLwlc")
|
| 92 |
+
ad_data["description"] = desc_element.text if desc_element else ""
|
| 93 |
+
|
| 94 |
+
# Extract ad URL
|
| 95 |
+
url_element = ad.find_element(By.CSS_SELECTOR, "a.sVXRqc")
|
| 96 |
+
ad_data["display_url"] = url_element.text if url_element else ""
|
| 97 |
+
ad_data["target_url"] = url_element.get_attribute("href") if url_element else ""
|
| 98 |
+
|
| 99 |
+
# Extract ad position
|
| 100 |
+
ad_data["position"] = len(ads) + 1
|
| 101 |
+
|
| 102 |
+
# Add timestamp
|
| 103 |
+
ad_data["scrape_time"] = time.strftime("%Y-%m-%d %H:%M:%S")
|
| 104 |
+
|
| 105 |
+
ads.append(ad_data)
|
| 106 |
+
except Exception as e:
|
| 107 |
+
logger.warning(f"Error extracting ad data: {e}")
|
| 108 |
+
continue
|
| 109 |
+
|
| 110 |
+
return ads
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.error(f"Error extracting search ads: {e}")
|
| 113 |
+
return []
|
| 114 |
+
|
| 115 |
+
def scrape_display_ads(self, target_url, scroll_count=5):
|
| 116 |
+
"""Scrape Google display ads from a specific page."""
|
| 117 |
+
with self._get_driver() as driver:
|
| 118 |
+
try:
|
| 119 |
+
driver.get(target_url)
|
| 120 |
+
driver.implicitly_wait(5)
|
| 121 |
+
|
| 122 |
+
# Scroll to load dynamic content
|
| 123 |
+
for _ in range(scroll_count):
|
| 124 |
+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
| 125 |
+
time.sleep(2)
|
| 126 |
+
|
| 127 |
+
# Extract iframe ads
|
| 128 |
+
iframes = driver.find_elements(By.CSS_SELECTOR, "iframe[id^='google_ads_iframe']")
|
| 129 |
+
|
| 130 |
+
ads = []
|
| 131 |
+
for iframe in iframes:
|
| 132 |
+
try:
|
| 133 |
+
# Switch to iframe context
|
| 134 |
+
driver.switch_to.frame(iframe)
|
| 135 |
+
|
| 136 |
+
# Extract ad data
|
| 137 |
+
ad_data = {
|
| 138 |
+
"iframe_id": iframe.get_attribute("id"),
|
| 139 |
+
"width": iframe.get_attribute("width"),
|
| 140 |
+
"height": iframe.get_attribute("height"),
|
| 141 |
+
"scrape_time": time.strftime("%Y-%m-%d %H:%M:%S"),
|
| 142 |
+
"page_url": target_url
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
# Try to get the ad image
|
| 146 |
+
try:
|
| 147 |
+
img = driver.find_element(By.CSS_SELECTOR, "img")
|
| 148 |
+
ad_data["image_url"] = img.get_attribute("src")
|
| 149 |
+
except:
|
| 150 |
+
ad_data["image_url"] = None
|
| 151 |
+
|
| 152 |
+
# Try to get the ad destination
|
| 153 |
+
try:
|
| 154 |
+
link = driver.find_element(By.CSS_SELECTOR, "a")
|
| 155 |
+
ad_data["target_url"] = link.get_attribute("href")
|
| 156 |
+
except:
|
| 157 |
+
ad_data["target_url"] = None
|
| 158 |
+
|
| 159 |
+
ads.append(ad_data)
|
| 160 |
+
|
| 161 |
+
# Switch back to main content
|
| 162 |
+
driver.switch_to.default_content()
|
| 163 |
+
except Exception as e:
|
| 164 |
+
logger.warning(f"Error processing iframe: {e}")
|
| 165 |
+
driver.switch_to.default_content()
|
| 166 |
+
continue
|
| 167 |
+
|
| 168 |
+
return ads
|
| 169 |
+
|
| 170 |
+
except (TimeoutException, WebDriverException) as e:
|
| 171 |
+
logger.error(f"Error during Google Display Ads scraping: {e}")
|
| 172 |
+
return []
|
app/templates/base.html
CHANGED
|
@@ -3,23 +3,55 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>Facebook Ad Analytics</title>
|
|
|
|
|
|
|
|
|
|
| 7 |
<link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
|
|
|
|
| 8 |
</head>
|
| 9 |
<body>
|
| 10 |
-
<
|
| 11 |
-
<
|
| 12 |
-
|
| 13 |
-
<
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
<main>
|
| 19 |
{% block content %}{% endblock %}
|
| 20 |
</main>
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
| 23 |
</footer>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
</body>
|
| 25 |
</html>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>{% block title %}Facebook Ad Analytics{% endblock %}</title>
|
| 7 |
+
<!-- Bootstrap CSS -->
|
| 8 |
+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 9 |
+
<!-- Custom CSS -->
|
| 10 |
<link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
|
| 11 |
+
{% block head_extra %}{% endblock %}
|
| 12 |
</head>
|
| 13 |
<body>
|
| 14 |
+
<nav class="navbar navbar-expand-lg navbar-dark bg-dark">
|
| 15 |
+
<div class="container">
|
| 16 |
+
<a class="navbar-brand" href="{{ url_for('dashboard.index') }}">Ad Analytics</a>
|
| 17 |
+
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
|
| 18 |
+
<span class="navbar-toggler-icon"></span>
|
| 19 |
+
</button>
|
| 20 |
+
<div class="collapse navbar-collapse" id="navbarNav">
|
| 21 |
+
<ul class="navbar-nav me-auto">
|
| 22 |
+
<li class="nav-item">
|
| 23 |
+
<a class="nav-link" href="{{ url_for('dashboard.index') }}">Dashboard</a>
|
| 24 |
+
</li>
|
| 25 |
+
<li class="nav-item">
|
| 26 |
+
<a class="nav-link" href="{{ url_for('google_ads.index') }}">Google Ads</a>
|
| 27 |
+
</li>
|
| 28 |
+
<li class="nav-item">
|
| 29 |
+
<a class="nav-link" href="{{ url_for('compliance.compliance_report') }}">Compliance</a>
|
| 30 |
+
</li>
|
| 31 |
+
</ul>
|
| 32 |
+
<ul class="navbar-nav">
|
| 33 |
+
<li class="nav-item">
|
| 34 |
+
<a class="nav-link" href="{{ url_for('auth.logout') }}">Logout</a>
|
| 35 |
+
</li>
|
| 36 |
+
</ul>
|
| 37 |
+
</div>
|
| 38 |
+
</div>
|
| 39 |
+
</nav>
|
| 40 |
+
|
| 41 |
<main>
|
| 42 |
{% block content %}{% endblock %}
|
| 43 |
</main>
|
| 44 |
+
|
| 45 |
+
<footer class="bg-dark text-white text-center py-3 mt-5">
|
| 46 |
+
<div class="container">
|
| 47 |
+
<p class="mb-0">© 2023 Ad Analytics Platform</p>
|
| 48 |
+
</div>
|
| 49 |
</footer>
|
| 50 |
+
|
| 51 |
+
<!-- Bootstrap Bundle with Popper -->
|
| 52 |
+
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"></script>
|
| 53 |
+
<!-- jQuery -->
|
| 54 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
| 55 |
+
{% block scripts %}{% endblock %}
|
| 56 |
</body>
|
| 57 |
</html>
|
app/templates/dashboard.html
CHANGED
|
@@ -1,30 +1,94 @@
|
|
| 1 |
{% extends "base.html" %}
|
| 2 |
|
| 3 |
{% block content %}
|
| 4 |
-
<div class="
|
| 5 |
-
<
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
<
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
<div class="
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
</div>
|
| 20 |
-
{% endfor %}
|
| 21 |
-
</div>
|
| 22 |
|
| 23 |
-
<
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
</div>
|
| 29 |
|
| 30 |
<script>
|
|
|
|
| 1 |
{% extends "base.html" %}
|
| 2 |
|
| 3 |
{% block content %}
|
| 4 |
+
<div class="container mt-4">
|
| 5 |
+
<h1 class="mb-4">Facebook Ad Analytics Dashboard</h1>
|
| 6 |
+
|
| 7 |
+
<div class="row mb-4">
|
| 8 |
+
<div class="col-md-4">
|
| 9 |
+
<div class="card">
|
| 10 |
+
<div class="card-header bg-primary text-white">
|
| 11 |
+
<h5 class="card-title mb-0">Facebook Ads</h5>
|
| 12 |
+
</div>
|
| 13 |
+
<div class="card-body">
|
| 14 |
+
<p class="card-text">View and analyze Facebook ads.</p>
|
| 15 |
+
<a href="#" class="btn btn-primary">View Ads</a>
|
| 16 |
+
</div>
|
| 17 |
+
</div>
|
| 18 |
+
</div>
|
| 19 |
+
|
| 20 |
+
<div class="col-md-4">
|
| 21 |
+
<div class="card">
|
| 22 |
+
<div class="card-header bg-success text-white">
|
| 23 |
+
<h5 class="card-title mb-0">Google Ads</h5>
|
| 24 |
+
</div>
|
| 25 |
+
<div class="card-body">
|
| 26 |
+
<p class="card-text">Scrape and analyze Google ads.</p>
|
| 27 |
+
<a href="{{ url_for('google_ads.index') }}" class="btn btn-success">Google Ads</a>
|
| 28 |
+
</div>
|
| 29 |
+
</div>
|
| 30 |
+
</div>
|
| 31 |
+
|
| 32 |
+
<div class="col-md-4">
|
| 33 |
+
<div class="card">
|
| 34 |
+
<div class="card-header bg-info text-white">
|
| 35 |
+
<h5 class="card-title mb-0">Compliance</h5>
|
| 36 |
+
</div>
|
| 37 |
+
<div class="card-body">
|
| 38 |
+
<p class="card-text">Generate compliance reports.</p>
|
| 39 |
+
<a href="#" class="btn btn-info">Compliance</a>
|
| 40 |
+
</div>
|
| 41 |
+
</div>
|
| 42 |
+
</div>
|
| 43 |
+
</div>
|
| 44 |
|
| 45 |
+
<div class="filters mb-4">
|
| 46 |
+
<div class="card">
|
| 47 |
+
<div class="card-header">
|
| 48 |
+
<h5 class="card-title mb-0">Filter Ads</h5>
|
| 49 |
+
</div>
|
| 50 |
+
<div class="card-body">
|
| 51 |
+
<div class="row">
|
| 52 |
+
<div class="col-md-5">
|
| 53 |
+
<input type="text" class="form-control" name="query" placeholder="Search ads..." value="{{ query }}">
|
| 54 |
+
</div>
|
| 55 |
+
<div class="col-md-5">
|
| 56 |
+
<select class="form-select" name="sentiment">
|
| 57 |
+
<option value="">All Sentiments</option>
|
| 58 |
+
<option value="Positive" {% if sentiment_filter == "Positive" %}selected{% endif %}>Positive</option>
|
| 59 |
+
<option value="Negative" {% if sentiment_filter == "Negative" %}selected{% endif %}>Negative</option>
|
| 60 |
+
</select>
|
| 61 |
+
</div>
|
| 62 |
+
<div class="col-md-2">
|
| 63 |
+
<button type="button" class="btn btn-primary w-100" onclick="applyFilters()">Apply</button>
|
| 64 |
+
</div>
|
| 65 |
+
</div>
|
| 66 |
+
</div>
|
| 67 |
+
</div>
|
| 68 |
+
</div>
|
| 69 |
+
|
| 70 |
+
<div class="ads-list">
|
| 71 |
+
{% for ad in ads.items %}
|
| 72 |
+
<div class="card mb-3">
|
| 73 |
+
<div class="card-body">
|
| 74 |
+
<p class="card-text">{{ ad.content }}</p>
|
| 75 |
+
<span class="badge {% if ad.sentiment == 'Positive' %}bg-success{% elif ad.sentiment == 'Negative' %}bg-danger{% else %}bg-secondary{% endif %}">
|
| 76 |
+
{{ ad.sentiment }}
|
| 77 |
+
</span>
|
| 78 |
+
</div>
|
| 79 |
+
</div>
|
| 80 |
+
{% endfor %}
|
| 81 |
</div>
|
|
|
|
|
|
|
| 82 |
|
| 83 |
+
<nav aria-label="Page navigation">
|
| 84 |
+
<ul class="pagination justify-content-center">
|
| 85 |
+
{% for p in range(1, ads.pages + 1) %}
|
| 86 |
+
<li class="page-item {% if p == ads.page %}active{% endif %}">
|
| 87 |
+
<a class="page-link" href="?page={{ p }}&query={{ query }}&sentiment={{ sentiment_filter }}">{{ p }}</a>
|
| 88 |
+
</li>
|
| 89 |
+
{% endfor %}
|
| 90 |
+
</ul>
|
| 91 |
+
</nav>
|
| 92 |
</div>
|
| 93 |
|
| 94 |
<script>
|
app/templates/google_ads/display.html
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Google Display Ads Scraper{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<nav aria-label="breadcrumb">
|
| 8 |
+
<ol class="breadcrumb">
|
| 9 |
+
<li class="breadcrumb-item"><a href="{{ url_for('google_ads.index') }}">Google Ads</a></li>
|
| 10 |
+
<li class="breadcrumb-item active" aria-current="page">Display Ads</li>
|
| 11 |
+
</ol>
|
| 12 |
+
</nav>
|
| 13 |
+
|
| 14 |
+
<h1 class="mb-4">Google Display Ads Scraper</h1>
|
| 15 |
+
|
| 16 |
+
<div class="card">
|
| 17 |
+
<div class="card-header bg-success text-white">
|
| 18 |
+
<h5 class="card-title mb-0">Scrape Display Ads from a Website</h5>
|
| 19 |
+
</div>
|
| 20 |
+
<div class="card-body">
|
| 21 |
+
<form id="display-form" method="post">
|
| 22 |
+
<div class="mb-3">
|
| 23 |
+
<label for="url" class="form-label">Target URL</label>
|
| 24 |
+
<input type="url" class="form-control" id="url" name="url" required
|
| 25 |
+
placeholder="https://example.com">
|
| 26 |
+
<div class="form-text">Enter a website URL that displays Google Ads.</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
<div class="mb-3">
|
| 30 |
+
<label for="scroll_count" class="form-label">Scroll Count</label>
|
| 31 |
+
<input type="number" class="form-control" id="scroll_count" name="scroll_count"
|
| 32 |
+
value="5" min="1" max="20">
|
| 33 |
+
<div class="form-text">How many times to scroll the page to load dynamic content (1-20).</div>
|
| 34 |
+
</div>
|
| 35 |
+
|
| 36 |
+
<button type="submit" class="btn btn-success" id="submit-btn">Start Scraping</button>
|
| 37 |
+
</form>
|
| 38 |
+
|
| 39 |
+
<div id="result-container" class="mt-4 d-none">
|
| 40 |
+
<div class="alert alert-info">
|
| 41 |
+
<h5>Scraping in Progress</h5>
|
| 42 |
+
<p>Your Google Display Ads scraping task has been started. This may take a few minutes.</p>
|
| 43 |
+
<p>Task ID: <span id="task-id"></span></p>
|
| 44 |
+
<p>You can view results once the task is complete.</p>
|
| 45 |
+
<a href="{{ url_for('google_ads.view_results') }}?type=display" class="btn btn-info">View Results</a>
|
| 46 |
+
</div>
|
| 47 |
+
</div>
|
| 48 |
+
</div>
|
| 49 |
+
</div>
|
| 50 |
+
|
| 51 |
+
<div class="card mt-4">
|
| 52 |
+
<div class="card-header bg-info text-white">
|
| 53 |
+
<h5 class="card-title mb-0">Tips for Display Ad Scraping</h5>
|
| 54 |
+
</div>
|
| 55 |
+
<div class="card-body">
|
| 56 |
+
<ul>
|
| 57 |
+
<li>Choose websites that are known to display Google Ads.</li>
|
| 58 |
+
<li>News sites, blogs, and content-heavy websites often have more display ads.</li>
|
| 59 |
+
<li>Some websites may block automated scraping.</li>
|
| 60 |
+
<li>The tool looks for iframes with Google Ads signatures.</li>
|
| 61 |
+
<li>Not all ads may be captured due to dynamic loading or anti-scraping measures.</li>
|
| 62 |
+
</ul>
|
| 63 |
+
</div>
|
| 64 |
+
</div>
|
| 65 |
+
</div>
|
| 66 |
+
|
| 67 |
+
{% endblock %}
|
| 68 |
+
|
| 69 |
+
{% block scripts %}
|
| 70 |
+
<script>
|
| 71 |
+
$(document).ready(function() {
|
| 72 |
+
$('#display-form').on('submit', function(e) {
|
| 73 |
+
e.preventDefault();
|
| 74 |
+
|
| 75 |
+
const submitBtn = $('#submit-btn');
|
| 76 |
+
submitBtn.prop('disabled', true).html('<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> Processing...');
|
| 77 |
+
|
| 78 |
+
$.ajax({
|
| 79 |
+
url: "{{ url_for('google_ads.display_ads') }}",
|
| 80 |
+
type: "POST",
|
| 81 |
+
data: $(this).serialize(),
|
| 82 |
+
success: function(response) {
|
| 83 |
+
$('#result-container').removeClass('d-none');
|
| 84 |
+
$('#task-id').text(response.task_id);
|
| 85 |
+
submitBtn.prop('disabled', false).text('Start Scraping');
|
| 86 |
+
},
|
| 87 |
+
error: function(xhr) {
|
| 88 |
+
alert('Error: ' + xhr.responseJSON.message);
|
| 89 |
+
submitBtn.prop('disabled', false).text('Start Scraping');
|
| 90 |
+
}
|
| 91 |
+
});
|
| 92 |
+
});
|
| 93 |
+
});
|
| 94 |
+
</script>
|
| 95 |
+
{% endblock %}
|
app/templates/google_ads/index.html
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Google Ads Analytics{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<h1 class="mb-4">Google Ads Analytics</h1>
|
| 8 |
+
|
| 9 |
+
<div class="row">
|
| 10 |
+
<div class="col-md-6">
|
| 11 |
+
<div class="card mb-4">
|
| 12 |
+
<div class="card-header bg-primary text-white">
|
| 13 |
+
<h5 class="card-title mb-0">Search Ads</h5>
|
| 14 |
+
</div>
|
| 15 |
+
<div class="card-body">
|
| 16 |
+
<p class="card-text">Scrape and analyze Google Search Ads for specific keywords.</p>
|
| 17 |
+
<a href="{{ url_for('google_ads.search_ads') }}" class="btn btn-primary">Search Ads</a>
|
| 18 |
+
</div>
|
| 19 |
+
</div>
|
| 20 |
+
</div>
|
| 21 |
+
|
| 22 |
+
<div class="col-md-6">
|
| 23 |
+
<div class="card mb-4">
|
| 24 |
+
<div class="card-header bg-success text-white">
|
| 25 |
+
<h5 class="card-title mb-0">Display Ads</h5>
|
| 26 |
+
</div>
|
| 27 |
+
<div class="card-body">
|
| 28 |
+
<p class="card-text">Scrape and analyze Google Display Ads from specific websites.</p>
|
| 29 |
+
<a href="{{ url_for('google_ads.display_ads') }}" class="btn btn-success">Display Ads</a>
|
| 30 |
+
</div>
|
| 31 |
+
</div>
|
| 32 |
+
</div>
|
| 33 |
+
</div>
|
| 34 |
+
|
| 35 |
+
<div class="row">
|
| 36 |
+
<div class="col-12">
|
| 37 |
+
<div class="card">
|
| 38 |
+
<div class="card-header bg-info text-white">
|
| 39 |
+
<h5 class="card-title mb-0">View Results</h5>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<p class="card-text">View and analyze your collected Google Ads data.</p>
|
| 43 |
+
<a href="{{ url_for('google_ads.view_results') }}" class="btn btn-info">View Results</a>
|
| 44 |
+
</div>
|
| 45 |
+
</div>
|
| 46 |
+
</div>
|
| 47 |
+
</div>
|
| 48 |
+
</div>
|
| 49 |
+
{% endblock %}
|
app/templates/google_ads/results.html
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Google Ads Results{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<nav aria-label="breadcrumb">
|
| 8 |
+
<ol class="breadcrumb">
|
| 9 |
+
<li class="breadcrumb-item"><a href="{{ url_for('google_ads.index') }}">Google Ads</a></li>
|
| 10 |
+
<li class="breadcrumb-item active" aria-current="page">Results</li>
|
| 11 |
+
</ol>
|
| 12 |
+
</nav>
|
| 13 |
+
|
| 14 |
+
<h1 class="mb-4">Google Ads Results</h1>
|
| 15 |
+
|
| 16 |
+
<div class="card mb-4">
|
| 17 |
+
<div class="card-header bg-info text-white">
|
| 18 |
+
<h5 class="card-title mb-0">Filter Results</h5>
|
| 19 |
+
</div>
|
| 20 |
+
<div class="card-body">
|
| 21 |
+
<form method="get" class="row g-3">
|
| 22 |
+
<div class="col-md-4">
|
| 23 |
+
<label for="type" class="form-label">Ad Type</label>
|
| 24 |
+
<select class="form-select" id="type" name="type">
|
| 25 |
+
<option value="all" {% if ad_type == 'all' %}selected{% endif %}>All Types</option>
|
| 26 |
+
<option value="search" {% if ad_type == 'search' %}selected{% endif %}>Search Ads</option>
|
| 27 |
+
<option value="display" {% if ad_type == 'display' %}selected{% endif %}>Display Ads</option>
|
| 28 |
+
</select>
|
| 29 |
+
</div>
|
| 30 |
+
<div class="col-md-6">
|
| 31 |
+
<label for="query" class="form-label">Search</label>
|
| 32 |
+
<input type="text" class="form-control" id="query" name="query" value="{{ query }}"
|
| 33 |
+
placeholder="Search in titles, descriptions, or keywords">
|
| 34 |
+
</div>
|
| 35 |
+
<div class="col-md-2 d-flex align-items-end">
|
| 36 |
+
<button type="submit" class="btn btn-primary w-100">Filter</button>
|
| 37 |
+
</div>
|
| 38 |
+
</form>
|
| 39 |
+
</div>
|
| 40 |
+
</div>
|
| 41 |
+
|
| 42 |
+
{% if ads %}
|
| 43 |
+
<div class="card">
|
| 44 |
+
<div class="card-header bg-success text-white">
|
| 45 |
+
<h5 class="card-title mb-0">{{ ads|length }} Ads Found</h5>
|
| 46 |
+
</div>
|
| 47 |
+
<div class="card-body p-0">
|
| 48 |
+
<div class="table-responsive">
|
| 49 |
+
<table class="table table-striped table-hover mb-0">
|
| 50 |
+
<thead>
|
| 51 |
+
<tr>
|
| 52 |
+
<th>Type</th>
|
| 53 |
+
<th>Title/Image</th>
|
| 54 |
+
<th>Description</th>
|
| 55 |
+
<th>URL</th>
|
| 56 |
+
<th>Sentiment</th>
|
| 57 |
+
<th>Date</th>
|
| 58 |
+
<th>Actions</th>
|
| 59 |
+
</tr>
|
| 60 |
+
</thead>
|
| 61 |
+
<tbody>
|
| 62 |
+
{% for ad in ads %}
|
| 63 |
+
<tr>
|
| 64 |
+
<td>
|
| 65 |
+
{% if ad.ad_type == 'search' %}
|
| 66 |
+
<span class="badge bg-primary">Search</span>
|
| 67 |
+
{% else %}
|
| 68 |
+
<span class="badge bg-success">Display</span>
|
| 69 |
+
{% endif %}
|
| 70 |
+
</td>
|
| 71 |
+
<td>
|
| 72 |
+
{% if ad.ad_type == 'search' %}
|
| 73 |
+
{{ ad.title }}
|
| 74 |
+
{% else %}
|
| 75 |
+
{% if ad.image_url %}
|
| 76 |
+
<img src="{{ ad.image_url }}" alt="Ad Image" style="max-width: 100px; max-height: 60px;">
|
| 77 |
+
{% else %}
|
| 78 |
+
<span class="text-muted">No image</span>
|
| 79 |
+
{% endif %}
|
| 80 |
+
{% endif %}
|
| 81 |
+
</td>
|
| 82 |
+
<td>
|
| 83 |
+
{% if ad.description %}
|
| 84 |
+
{{ ad.description|truncate(100) }}
|
| 85 |
+
{% else %}
|
| 86 |
+
<span class="text-muted">No description</span>
|
| 87 |
+
{% endif %}
|
| 88 |
+
</td>
|
| 89 |
+
<td>
|
| 90 |
+
{% if ad.target_url %}
|
| 91 |
+
<a href="{{ ad.target_url }}" target="_blank" rel="noopener noreferrer">
|
| 92 |
+
{{ ad.display_url or ad.target_url|truncate(30) }}
|
| 93 |
+
</a>
|
| 94 |
+
{% else %}
|
| 95 |
+
<span class="text-muted">No URL</span>
|
| 96 |
+
{% endif %}
|
| 97 |
+
</td>
|
| 98 |
+
<td>
|
| 99 |
+
{% if ad.sentiment %}
|
| 100 |
+
<span class="badge
|
| 101 |
+
{% if ad.sentiment.label == 'POSITIVE' %}bg-success
|
| 102 |
+
{% elif ad.sentiment.label == 'NEGATIVE' %}bg-danger
|
| 103 |
+
{% else %}bg-secondary{% endif %}">
|
| 104 |
+
{{ ad.sentiment.label }} ({{ (ad.sentiment.score * 100)|round(1) }}%)
|
| 105 |
+
</span>
|
| 106 |
+
{% else %}
|
| 107 |
+
<span class="text-muted">Not analyzed</span>
|
| 108 |
+
{% endif %}
|
| 109 |
+
</td>
|
| 110 |
+
<td>{{ ad.created_at.strftime('%Y-%m-%d %H:%M') }}</td>
|
| 111 |
+
<td>
|
| 112 |
+
<button class="btn btn-sm btn-info view-details"
|
| 113 |
+
data-id="{{ ad.id }}"
|
| 114 |
+
data-bs-toggle="modal"
|
| 115 |
+
data-bs-target="#adDetailsModal">
|
| 116 |
+
Details
|
| 117 |
+
</button>
|
| 118 |
+
</td>
|
| 119 |
+
</tr>
|
| 120 |
+
{% endfor %}
|
| 121 |
+
</tbody>
|
| 122 |
+
</table>
|
| 123 |
+
</div>
|
| 124 |
+
</div>
|
| 125 |
+
</div>
|
| 126 |
+
{% else %}
|
| 127 |
+
<div class="alert alert-info">
|
| 128 |
+
<h5>No ads found</h5>
|
| 129 |
+
<p>No Google Ads match your search criteria. Try changing your filters or scrape some ads first.</p>
|
| 130 |
+
<div class="mt-3">
|
| 131 |
+
<a href="{{ url_for('google_ads.search_ads') }}" class="btn btn-primary me-2">Scrape Search Ads</a>
|
| 132 |
+
<a href="{{ url_for('google_ads.display_ads') }}" class="btn btn-success">Scrape Display Ads</a>
|
| 133 |
+
</div>
|
| 134 |
+
</div>
|
| 135 |
+
{% endif %}
|
| 136 |
+
</div>
|
| 137 |
+
|
| 138 |
+
<!-- Ad Details Modal -->
|
| 139 |
+
<div class="modal fade" id="adDetailsModal" tabindex="-1" aria-labelledby="adDetailsModalLabel" aria-hidden="true">
|
| 140 |
+
<div class="modal-dialog modal-lg">
|
| 141 |
+
<div class="modal-content">
|
| 142 |
+
<div class="modal-header">
|
| 143 |
+
<h5 class="modal-title" id="adDetailsModalLabel">Ad Details</h5>
|
| 144 |
+
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
|
| 145 |
+
</div>
|
| 146 |
+
<div class="modal-body">
|
| 147 |
+
<div id="adDetailsContent">
|
| 148 |
+
<div class="text-center">
|
| 149 |
+
<div class="spinner-border" role="status">
|
| 150 |
+
<span class="visually-hidden">Loading...</span>
|
| 151 |
+
</div>
|
| 152 |
+
</div>
|
| 153 |
+
</div>
|
| 154 |
+
</div>
|
| 155 |
+
<div class="modal-footer">
|
| 156 |
+
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
|
| 157 |
+
</div>
|
| 158 |
+
</div>
|
| 159 |
+
</div>
|
| 160 |
+
</div>
|
| 161 |
+
{% endblock %}
|
| 162 |
+
|
| 163 |
+
{% block scripts %}
|
| 164 |
+
<script>
|
| 165 |
+
$(document).ready(function() {
|
| 166 |
+
$('.view-details').on('click', function() {
|
| 167 |
+
const adId = $(this).data('id');
|
| 168 |
+
|
| 169 |
+
// Clear previous content and show loading spinner
|
| 170 |
+
$('#adDetailsContent').html('<div class="text-center"><div class="spinner-border" role="status"><span class="visually-hidden">Loading...</span></div></div>');
|
| 171 |
+
|
| 172 |
+
// Fetch ad details
|
| 173 |
+
$.ajax({
|
| 174 |
+
url: "{{ url_for('google_ads.api_get_ads') }}?id=" + adId,
|
| 175 |
+
type: "GET",
|
| 176 |
+
success: function(response) {
|
| 177 |
+
if (response && response.length > 0) {
|
| 178 |
+
const ad = response[0];
|
| 179 |
+
let content = '<div class="ad-details">';
|
| 180 |
+
|
| 181 |
+
// Ad type badge
|
| 182 |
+
content += '<div class="mb-3">';
|
| 183 |
+
if (ad.ad_type === 'search') {
|
| 184 |
+
content += '<span class="badge bg-primary">Search Ad</span>';
|
| 185 |
+
} else {
|
| 186 |
+
content += '<span class="badge bg-success">Display Ad</span>';
|
| 187 |
+
}
|
| 188 |
+
content += '</div>';
|
| 189 |
+
|
| 190 |
+
// Title and description
|
| 191 |
+
if (ad.title) {
|
| 192 |
+
content += '<h4>' + ad.title + '</h4>';
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
if (ad.description) {
|
| 196 |
+
content += '<p>' + ad.description + '</p>';
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
// Image for display ads
|
| 200 |
+
if (ad.image_url) {
|
| 201 |
+
content += '<div class="text-center mb-3"><img src="' + ad.image_url + '" alt="Ad Image" style="max-width: 100%;"></div>';
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
// URL
|
| 205 |
+
if (ad.target_url) {
|
| 206 |
+
content += '<div class="mb-3"><strong>URL:</strong> <a href="' + ad.target_url + '" target="_blank">' + (ad.display_url || ad.target_url) + '</a></div>';
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
// Search query for search ads
|
| 210 |
+
if (ad.search_query) {
|
| 211 |
+
content += '<div class="mb-3"><strong>Search Query:</strong> ' + ad.search_query + '</div>';
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
// Page URL for display ads
|
| 215 |
+
if (ad.page_url) {
|
| 216 |
+
content += '<div class="mb-3"><strong>Found on:</strong> <a href="' + ad.page_url + '" target="_blank">' + ad.page_url + '</a></div>';
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
// Position for search ads
|
| 220 |
+
if (ad.position) {
|
| 221 |
+
content += '<div class="mb-3"><strong>Position:</strong> ' + ad.position + '</div>';
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
// Sentiment analysis
|
| 225 |
+
if (ad.sentiment) {
|
| 226 |
+
let sentimentClass = 'bg-secondary';
|
| 227 |
+
if (ad.sentiment.label === 'POSITIVE') sentimentClass = 'bg-success';
|
| 228 |
+
if (ad.sentiment.label === 'NEGATIVE') sentimentClass = 'bg-danger';
|
| 229 |
+
|
| 230 |
+
content += '<div class="mb-3"><strong>Sentiment:</strong> ';
|
| 231 |
+
content += '<span class="badge ' + sentimentClass + '">' + ad.sentiment.label + ' (' + (ad.sentiment.score * 100).toFixed(1) + '%)</span></div>';
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
// Date
|
| 235 |
+
if (ad.created_at) {
|
| 236 |
+
content += '<div class="mb-3"><strong>Scraped on:</strong> ' + ad.created_at + '</div>';
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
content += '</div>';
|
| 240 |
+
$('#adDetailsContent').html(content);
|
| 241 |
+
} else {
|
| 242 |
+
$('#adDetailsContent').html('<div class="alert alert-danger">Ad details not found</div>');
|
| 243 |
+
}
|
| 244 |
+
},
|
| 245 |
+
error: function() {
|
| 246 |
+
$('#adDetailsContent').html('<div class="alert alert-danger">Error loading ad details</div>');
|
| 247 |
+
}
|
| 248 |
+
});
|
| 249 |
+
});
|
| 250 |
+
});
|
| 251 |
+
</script>
|
| 252 |
+
{% endblock %}
|
app/templates/google_ads/search.html
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Google Search Ads Scraper{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<nav aria-label="breadcrumb">
|
| 8 |
+
<ol class="breadcrumb">
|
| 9 |
+
<li class="breadcrumb-item"><a href="{{ url_for('google_ads.index') }}">Google Ads</a></li>
|
| 10 |
+
<li class="breadcrumb-item active" aria-current="page">Search Ads</li>
|
| 11 |
+
</ol>
|
| 12 |
+
</nav>
|
| 13 |
+
|
| 14 |
+
<h1 class="mb-4">Google Search Ads Scraper</h1>
|
| 15 |
+
|
| 16 |
+
<div class="card">
|
| 17 |
+
<div class="card-header bg-primary text-white">
|
| 18 |
+
<h5 class="card-title mb-0">Search for Google Ads</h5>
|
| 19 |
+
</div>
|
| 20 |
+
<div class="card-body">
|
| 21 |
+
<form id="search-form" method="post">
|
| 22 |
+
<div class="mb-3">
|
| 23 |
+
<label for="query" class="form-label">Search Query</label>
|
| 24 |
+
<input type="text" class="form-control" id="query" name="query" required
|
| 25 |
+
placeholder="Enter a keyword or phrase (e.g., 'buy shoes online')">
|
| 26 |
+
<div class="form-text">Enter a keyword that would trigger ads on Google Search.</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
<div class="mb-3">
|
| 30 |
+
<label for="num_pages" class="form-label">Number of Pages</label>
|
| 31 |
+
<input type="number" class="form-control" id="num_pages" name="num_pages"
|
| 32 |
+
value="3" min="1" max="10">
|
| 33 |
+
<div class="form-text">How many search result pages to scrape (1-10).</div>
|
| 34 |
+
</div>
|
| 35 |
+
|
| 36 |
+
<button type="submit" class="btn btn-primary" id="submit-btn">Start Scraping</button>
|
| 37 |
+
</form>
|
| 38 |
+
|
| 39 |
+
<div id="result-container" class="mt-4 d-none">
|
| 40 |
+
<div class="alert alert-info">
|
| 41 |
+
<h5>Scraping in Progress</h5>
|
| 42 |
+
<p>Your Google Ads scraping task has been started. This may take a few minutes.</p>
|
| 43 |
+
<p>Task ID: <span id="task-id"></span></p>
|
| 44 |
+
<p>You can view results once the task is complete.</p>
|
| 45 |
+
<a href="{{ url_for('google_ads.view_results') }}" class="btn btn-info">View Results</a>
|
| 46 |
+
</div>
|
| 47 |
+
</div>
|
| 48 |
+
</div>
|
| 49 |
+
</div>
|
| 50 |
+
</div>
|
| 51 |
+
|
| 52 |
+
{% endblock %}
|
| 53 |
+
|
| 54 |
+
{% block scripts %}
|
| 55 |
+
<script>
|
| 56 |
+
$(document).ready(function() {
|
| 57 |
+
$('#search-form').on('submit', function(e) {
|
| 58 |
+
e.preventDefault();
|
| 59 |
+
|
| 60 |
+
const submitBtn = $('#submit-btn');
|
| 61 |
+
submitBtn.prop('disabled', true).html('<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> Processing...');
|
| 62 |
+
|
| 63 |
+
$.ajax({
|
| 64 |
+
url: "{{ url_for('google_ads.search_ads') }}",
|
| 65 |
+
type: "POST",
|
| 66 |
+
data: $(this).serialize(),
|
| 67 |
+
success: function(response) {
|
| 68 |
+
$('#result-container').removeClass('d-none');
|
| 69 |
+
$('#task-id').text(response.task_id);
|
| 70 |
+
submitBtn.prop('disabled', false).text('Start Scraping');
|
| 71 |
+
},
|
| 72 |
+
error: function(xhr) {
|
| 73 |
+
alert('Error: ' + xhr.responseJSON.message);
|
| 74 |
+
submitBtn.prop('disabled', false).text('Start Scraping');
|
| 75 |
+
}
|
| 76 |
+
});
|
| 77 |
+
});
|
| 78 |
+
});
|
| 79 |
+
</script>
|
| 80 |
+
{% endblock %}
|
app/utils/decorators.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from functools import wraps
|
| 2 |
-
from flask import redirect, url_for, flash
|
| 3 |
from flask_login import current_user
|
| 4 |
|
| 5 |
def admin_required(f):
|
|
|
|
| 1 |
from functools import wraps
|
| 2 |
+
from flask import redirect, url_for, flash, jsonify
|
| 3 |
from flask_login import current_user
|
| 4 |
|
| 5 |
def admin_required(f):
|
config.py
CHANGED
|
@@ -1,20 +1,52 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
|
| 3 |
class Config:
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
SQLALCHEMY_DATABASE_URI = os.getenv('DATABASE_URL', 'sqlite:////tmp/app.db')
|
| 6 |
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
| 7 |
CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0')
|
| 8 |
CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0')
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
class DevelopmentConfig(Config):
|
| 13 |
DEBUG = True
|
|
|
|
| 14 |
|
| 15 |
class ProductionConfig(Config):
|
| 16 |
DEBUG = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
class TestingConfig(Config):
|
| 19 |
TESTING = True
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
|
| 4 |
class Config:
|
| 5 |
+
# Use a default secret key for development, but warn about it
|
| 6 |
+
SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production')
|
| 7 |
+
if SECRET_KEY == 'dev-secret-key-change-in-production':
|
| 8 |
+
print("WARNING: Using default SECRET_KEY. This is insecure and should be changed in production.")
|
| 9 |
+
|
| 10 |
SQLALCHEMY_DATABASE_URI = os.getenv('DATABASE_URL', 'sqlite:////tmp/app.db')
|
| 11 |
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
| 12 |
CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0')
|
| 13 |
CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0')
|
| 14 |
+
|
| 15 |
+
# Use a mock API key for development if not provided
|
| 16 |
+
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'sk-mock-key-for-development')
|
| 17 |
+
if OPENAI_API_KEY == 'sk-mock-key-for-development':
|
| 18 |
+
print("WARNING: Using mock OPENAI_API_KEY. API calls will fail in production.")
|
| 19 |
+
|
| 20 |
+
# Ensure instance path exists
|
| 21 |
+
INSTANCE_PATH = os.getenv('INSTANCE_PATH', '/tmp/instance')
|
| 22 |
+
Path(INSTANCE_PATH).mkdir(parents=True, exist_ok=True)
|
| 23 |
|
| 24 |
class DevelopmentConfig(Config):
|
| 25 |
DEBUG = True
|
| 26 |
+
TESTING = False
|
| 27 |
|
| 28 |
class ProductionConfig(Config):
|
| 29 |
DEBUG = False
|
| 30 |
+
TESTING = False
|
| 31 |
+
|
| 32 |
+
# In production, we require real values for these settings
|
| 33 |
+
def __init__(self):
|
| 34 |
+
if self.SECRET_KEY == 'dev-secret-key-change-in-production':
|
| 35 |
+
raise ValueError("Production requires a real SECRET_KEY")
|
| 36 |
+
if self.OPENAI_API_KEY == 'sk-mock-key-for-development':
|
| 37 |
+
raise ValueError("Production requires a real OPENAI_API_KEY")
|
| 38 |
|
| 39 |
class TestingConfig(Config):
|
| 40 |
TESTING = True
|
| 41 |
+
DEBUG = True
|
| 42 |
+
SQLALCHEMY_DATABASE_URI = 'sqlite:///:memory:'
|
| 43 |
+
|
| 44 |
+
# Function to determine which config to use
|
| 45 |
+
def get_config():
|
| 46 |
+
env = os.getenv('FLASK_ENV', 'development')
|
| 47 |
+
if env == 'production':
|
| 48 |
+
return ProductionConfig()
|
| 49 |
+
elif env == 'testing':
|
| 50 |
+
return TestingConfig()
|
| 51 |
+
else:
|
| 52 |
+
return DevelopmentConfig()
|
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/INSTALLER
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
pip
|
|
|
|
|
|
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/LICENSE
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
Copyright (c) 2017-2021 Ingy döt Net
|
| 2 |
-
Copyright (c) 2006-2016 Kirill Simonov
|
| 3 |
-
|
| 4 |
-
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
| 5 |
-
this software and associated documentation files (the "Software"), to deal in
|
| 6 |
-
the Software without restriction, including without limitation the rights to
|
| 7 |
-
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
| 8 |
-
of the Software, and to permit persons to whom the Software is furnished to do
|
| 9 |
-
so, subject to the following conditions:
|
| 10 |
-
|
| 11 |
-
The above copyright notice and this permission notice shall be included in all
|
| 12 |
-
copies or substantial portions of the Software.
|
| 13 |
-
|
| 14 |
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 16 |
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 17 |
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 18 |
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 19 |
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 20 |
-
SOFTWARE.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/METADATA
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
Metadata-Version: 2.1
|
| 2 |
-
Name: PyYAML
|
| 3 |
-
Version: 6.0.2
|
| 4 |
-
Summary: YAML parser and emitter for Python
|
| 5 |
-
Home-page: https://pyyaml.org/
|
| 6 |
-
Download-URL: https://pypi.org/project/PyYAML/
|
| 7 |
-
Author: Kirill Simonov
|
| 8 |
-
Author-email: xi@resolvent.net
|
| 9 |
-
License: MIT
|
| 10 |
-
Project-URL: Bug Tracker, https://github.com/yaml/pyyaml/issues
|
| 11 |
-
Project-URL: CI, https://github.com/yaml/pyyaml/actions
|
| 12 |
-
Project-URL: Documentation, https://pyyaml.org/wiki/PyYAMLDocumentation
|
| 13 |
-
Project-URL: Mailing lists, http://lists.sourceforge.net/lists/listinfo/yaml-core
|
| 14 |
-
Project-URL: Source Code, https://github.com/yaml/pyyaml
|
| 15 |
-
Platform: Any
|
| 16 |
-
Classifier: Development Status :: 5 - Production/Stable
|
| 17 |
-
Classifier: Intended Audience :: Developers
|
| 18 |
-
Classifier: License :: OSI Approved :: MIT License
|
| 19 |
-
Classifier: Operating System :: OS Independent
|
| 20 |
-
Classifier: Programming Language :: Cython
|
| 21 |
-
Classifier: Programming Language :: Python
|
| 22 |
-
Classifier: Programming Language :: Python :: 3
|
| 23 |
-
Classifier: Programming Language :: Python :: 3.8
|
| 24 |
-
Classifier: Programming Language :: Python :: 3.9
|
| 25 |
-
Classifier: Programming Language :: Python :: 3.10
|
| 26 |
-
Classifier: Programming Language :: Python :: 3.11
|
| 27 |
-
Classifier: Programming Language :: Python :: 3.12
|
| 28 |
-
Classifier: Programming Language :: Python :: 3.13
|
| 29 |
-
Classifier: Programming Language :: Python :: Implementation :: CPython
|
| 30 |
-
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
| 31 |
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
| 32 |
-
Classifier: Topic :: Text Processing :: Markup
|
| 33 |
-
Requires-Python: >=3.8
|
| 34 |
-
License-File: LICENSE
|
| 35 |
-
|
| 36 |
-
YAML is a data serialization format designed for human readability
|
| 37 |
-
and interaction with scripting languages. PyYAML is a YAML parser
|
| 38 |
-
and emitter for Python.
|
| 39 |
-
|
| 40 |
-
PyYAML features a complete YAML 1.1 parser, Unicode support, pickle
|
| 41 |
-
support, capable extension API, and sensible error messages. PyYAML
|
| 42 |
-
supports standard YAML tags and provides Python-specific tags that
|
| 43 |
-
allow to represent an arbitrary Python object.
|
| 44 |
-
|
| 45 |
-
PyYAML is applicable for a broad range of tasks from complex
|
| 46 |
-
configuration files to object serialization and persistence.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/RECORD
DELETED
|
@@ -1,43 +0,0 @@
|
|
| 1 |
-
PyYAML-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
-
PyYAML-6.0.2.dist-info/LICENSE,sha256=jTko-dxEkP1jVwfLiOsmvXZBAqcoKVQwfT5RZ6V36KQ,1101
|
| 3 |
-
PyYAML-6.0.2.dist-info/METADATA,sha256=9lwXqTOrXPts-jI2Lo5UwuaAYo0hiRA0BZqjch0WjAk,2106
|
| 4 |
-
PyYAML-6.0.2.dist-info/RECORD,,
|
| 5 |
-
PyYAML-6.0.2.dist-info/WHEEL,sha256=c7SWG1_hRvc9HXHEkmWlTu1Jr4WpzRucfzqTP-_8q0s,102
|
| 6 |
-
PyYAML-6.0.2.dist-info/top_level.txt,sha256=rpj0IVMTisAjh_1vG3Ccf9v5jpCQwAz6cD1IVU5ZdhQ,11
|
| 7 |
-
_yaml/__init__.py,sha256=04Ae_5osxahpJHa3XBZUAf4wi6XX32gR8D6X6p64GEA,1402
|
| 8 |
-
_yaml/__pycache__/__init__.cpython-312.pyc,,
|
| 9 |
-
yaml/__init__.py,sha256=N35S01HMesFTe0aRRMWkPj0Pa8IEbHpE9FK7cr5Bdtw,12311
|
| 10 |
-
yaml/__pycache__/__init__.cpython-312.pyc,,
|
| 11 |
-
yaml/__pycache__/composer.cpython-312.pyc,,
|
| 12 |
-
yaml/__pycache__/constructor.cpython-312.pyc,,
|
| 13 |
-
yaml/__pycache__/cyaml.cpython-312.pyc,,
|
| 14 |
-
yaml/__pycache__/dumper.cpython-312.pyc,,
|
| 15 |
-
yaml/__pycache__/emitter.cpython-312.pyc,,
|
| 16 |
-
yaml/__pycache__/error.cpython-312.pyc,,
|
| 17 |
-
yaml/__pycache__/events.cpython-312.pyc,,
|
| 18 |
-
yaml/__pycache__/loader.cpython-312.pyc,,
|
| 19 |
-
yaml/__pycache__/nodes.cpython-312.pyc,,
|
| 20 |
-
yaml/__pycache__/parser.cpython-312.pyc,,
|
| 21 |
-
yaml/__pycache__/reader.cpython-312.pyc,,
|
| 22 |
-
yaml/__pycache__/representer.cpython-312.pyc,,
|
| 23 |
-
yaml/__pycache__/resolver.cpython-312.pyc,,
|
| 24 |
-
yaml/__pycache__/scanner.cpython-312.pyc,,
|
| 25 |
-
yaml/__pycache__/serializer.cpython-312.pyc,,
|
| 26 |
-
yaml/__pycache__/tokens.cpython-312.pyc,,
|
| 27 |
-
yaml/_yaml.cp312-win_amd64.pyd,sha256=Bx7e_LEQx7cnd1_A9_nClp3X77g-_Lw1aoAAtYZbwWk,263680
|
| 28 |
-
yaml/composer.py,sha256=_Ko30Wr6eDWUeUpauUGT3Lcg9QPBnOPVlTnIMRGJ9FM,4883
|
| 29 |
-
yaml/constructor.py,sha256=kNgkfaeLUkwQYY_Q6Ff1Tz2XVw_pG1xVE9Ak7z-viLA,28639
|
| 30 |
-
yaml/cyaml.py,sha256=6ZrAG9fAYvdVe2FK_w0hmXoG7ZYsoYUwapG8CiC72H0,3851
|
| 31 |
-
yaml/dumper.py,sha256=PLctZlYwZLp7XmeUdwRuv4nYOZ2UBnDIUy8-lKfLF-o,2837
|
| 32 |
-
yaml/emitter.py,sha256=jghtaU7eFwg31bG0B7RZea_29Adi9CKmXq_QjgQpCkQ,43006
|
| 33 |
-
yaml/error.py,sha256=Ah9z-toHJUbE9j-M8YpxgSRM5CgLCcwVzJgLLRF2Fxo,2533
|
| 34 |
-
yaml/events.py,sha256=50_TksgQiE4up-lKo_V-nBy-tAIxkIPQxY5qDhKCeHw,2445
|
| 35 |
-
yaml/loader.py,sha256=UVa-zIqmkFSCIYq_PgSGm4NSJttHY2Rf_zQ4_b1fHN0,2061
|
| 36 |
-
yaml/nodes.py,sha256=gPKNj8pKCdh2d4gr3gIYINnPOaOxGhJAUiYhGRnPE84,1440
|
| 37 |
-
yaml/parser.py,sha256=ilWp5vvgoHFGzvOZDItFoGjD6D42nhlZrZyjAwa0oJo,25495
|
| 38 |
-
yaml/reader.py,sha256=0dmzirOiDG4Xo41RnuQS7K9rkY3xjHiVasfDMNTqCNw,6794
|
| 39 |
-
yaml/representer.py,sha256=IuWP-cAW9sHKEnS0gCqSa894k1Bg4cgTxaDwIcbRQ-Y,14190
|
| 40 |
-
yaml/resolver.py,sha256=9L-VYfm4mWHxUD1Vg4X7rjDRK_7VZd6b92wzq7Y2IKY,9004
|
| 41 |
-
yaml/scanner.py,sha256=YEM3iLZSaQwXcQRg2l2R4MdT0zGP2F9eHkKGKnHyWQY,51279
|
| 42 |
-
yaml/serializer.py,sha256=ChuFgmhU01hj4xgI8GaKv6vfM2Bujwa9i7d2FAHj7cA,4165
|
| 43 |
-
yaml/tokens.py,sha256=lTQIzSVw8Mg9wv459-TjiOQe6wVziqaRlqX2_89rp54,2573
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/WHEEL
DELETED
|
@@ -1,5 +0,0 @@
|
|
| 1 |
-
Wheel-Version: 1.0
|
| 2 |
-
Generator: bdist_wheel (0.44.0)
|
| 3 |
-
Root-Is-Purelib: false
|
| 4 |
-
Tag: cp312-cp312-win_amd64
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/PyYAML-6.0.2.dist-info/top_level.txt
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
_yaml
|
| 2 |
-
yaml
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/__pycache__/typing_extensions.cpython-312.pyc
DELETED
|
Binary file (139 kB)
|
|
|
hf_env/Lib/site-packages/_yaml/__init__.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
# This is a stub package designed to roughly emulate the _yaml
|
| 2 |
-
# extension module, which previously existed as a standalone module
|
| 3 |
-
# and has been moved into the `yaml` package namespace.
|
| 4 |
-
# It does not perfectly mimic its old counterpart, but should get
|
| 5 |
-
# close enough for anyone who's relying on it even when they shouldn't.
|
| 6 |
-
import yaml
|
| 7 |
-
|
| 8 |
-
# in some circumstances, the yaml module we imoprted may be from a different version, so we need
|
| 9 |
-
# to tread carefully when poking at it here (it may not have the attributes we expect)
|
| 10 |
-
if not getattr(yaml, '__with_libyaml__', False):
|
| 11 |
-
from sys import version_info
|
| 12 |
-
|
| 13 |
-
exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
|
| 14 |
-
raise exc("No module named '_yaml'")
|
| 15 |
-
else:
|
| 16 |
-
from yaml._yaml import *
|
| 17 |
-
import warnings
|
| 18 |
-
warnings.warn(
|
| 19 |
-
'The _yaml extension module is now located at yaml._yaml'
|
| 20 |
-
' and its location is subject to change. To use the'
|
| 21 |
-
' LibYAML-based parser and emitter, import from `yaml`:'
|
| 22 |
-
' `from yaml import CLoader as Loader, CDumper as Dumper`.',
|
| 23 |
-
DeprecationWarning
|
| 24 |
-
)
|
| 25 |
-
del warnings
|
| 26 |
-
# Don't `del yaml` here because yaml is actually an existing
|
| 27 |
-
# namespace member of _yaml.
|
| 28 |
-
|
| 29 |
-
__name__ = '_yaml'
|
| 30 |
-
# If the module is top-level (i.e. not a part of any specific package)
|
| 31 |
-
# then the attribute should be set to ''.
|
| 32 |
-
# https://docs.python.org/3.8/library/types.html
|
| 33 |
-
__package__ = ''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/_yaml/__pycache__/__init__.cpython-312.pyc
DELETED
|
Binary file (858 Bytes)
|
|
|
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/INSTALLER
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
pip
|
|
|
|
|
|
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/LICENSE
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
This package contains a modified version of ca-bundle.crt:
|
| 2 |
-
|
| 3 |
-
ca-bundle.crt -- Bundle of CA Root Certificates
|
| 4 |
-
|
| 5 |
-
This is a bundle of X.509 certificates of public Certificate Authorities
|
| 6 |
-
(CA). These were automatically extracted from Mozilla's root certificates
|
| 7 |
-
file (certdata.txt). This file can be found in the mozilla source tree:
|
| 8 |
-
https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
|
| 9 |
-
It contains the certificates in PEM format and therefore
|
| 10 |
-
can be directly used with curl / libcurl / php_curl, or with
|
| 11 |
-
an Apache+mod_ssl webserver for SSL client authentication.
|
| 12 |
-
Just configure this file as the SSLCACertificateFile.#
|
| 13 |
-
|
| 14 |
-
***** BEGIN LICENSE BLOCK *****
|
| 15 |
-
This Source Code Form is subject to the terms of the Mozilla Public License,
|
| 16 |
-
v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
|
| 17 |
-
one at http://mozilla.org/MPL/2.0/.
|
| 18 |
-
|
| 19 |
-
***** END LICENSE BLOCK *****
|
| 20 |
-
@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/METADATA
DELETED
|
@@ -1,77 +0,0 @@
|
|
| 1 |
-
Metadata-Version: 2.2
|
| 2 |
-
Name: certifi
|
| 3 |
-
Version: 2025.1.31
|
| 4 |
-
Summary: Python package for providing Mozilla's CA Bundle.
|
| 5 |
-
Home-page: https://github.com/certifi/python-certifi
|
| 6 |
-
Author: Kenneth Reitz
|
| 7 |
-
Author-email: me@kennethreitz.com
|
| 8 |
-
License: MPL-2.0
|
| 9 |
-
Project-URL: Source, https://github.com/certifi/python-certifi
|
| 10 |
-
Classifier: Development Status :: 5 - Production/Stable
|
| 11 |
-
Classifier: Intended Audience :: Developers
|
| 12 |
-
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
|
| 13 |
-
Classifier: Natural Language :: English
|
| 14 |
-
Classifier: Programming Language :: Python
|
| 15 |
-
Classifier: Programming Language :: Python :: 3
|
| 16 |
-
Classifier: Programming Language :: Python :: 3 :: Only
|
| 17 |
-
Classifier: Programming Language :: Python :: 3.6
|
| 18 |
-
Classifier: Programming Language :: Python :: 3.7
|
| 19 |
-
Classifier: Programming Language :: Python :: 3.8
|
| 20 |
-
Classifier: Programming Language :: Python :: 3.9
|
| 21 |
-
Classifier: Programming Language :: Python :: 3.10
|
| 22 |
-
Classifier: Programming Language :: Python :: 3.11
|
| 23 |
-
Classifier: Programming Language :: Python :: 3.12
|
| 24 |
-
Classifier: Programming Language :: Python :: 3.13
|
| 25 |
-
Requires-Python: >=3.6
|
| 26 |
-
License-File: LICENSE
|
| 27 |
-
Dynamic: author
|
| 28 |
-
Dynamic: author-email
|
| 29 |
-
Dynamic: classifier
|
| 30 |
-
Dynamic: description
|
| 31 |
-
Dynamic: home-page
|
| 32 |
-
Dynamic: license
|
| 33 |
-
Dynamic: project-url
|
| 34 |
-
Dynamic: requires-python
|
| 35 |
-
Dynamic: summary
|
| 36 |
-
|
| 37 |
-
Certifi: Python SSL Certificates
|
| 38 |
-
================================
|
| 39 |
-
|
| 40 |
-
Certifi provides Mozilla's carefully curated collection of Root Certificates for
|
| 41 |
-
validating the trustworthiness of SSL certificates while verifying the identity
|
| 42 |
-
of TLS hosts. It has been extracted from the `Requests`_ project.
|
| 43 |
-
|
| 44 |
-
Installation
|
| 45 |
-
------------
|
| 46 |
-
|
| 47 |
-
``certifi`` is available on PyPI. Simply install it with ``pip``::
|
| 48 |
-
|
| 49 |
-
$ pip install certifi
|
| 50 |
-
|
| 51 |
-
Usage
|
| 52 |
-
-----
|
| 53 |
-
|
| 54 |
-
To reference the installed certificate authority (CA) bundle, you can use the
|
| 55 |
-
built-in function::
|
| 56 |
-
|
| 57 |
-
>>> import certifi
|
| 58 |
-
|
| 59 |
-
>>> certifi.where()
|
| 60 |
-
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
|
| 61 |
-
|
| 62 |
-
Or from the command line::
|
| 63 |
-
|
| 64 |
-
$ python -m certifi
|
| 65 |
-
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
|
| 66 |
-
|
| 67 |
-
Enjoy!
|
| 68 |
-
|
| 69 |
-
.. _`Requests`: https://requests.readthedocs.io/en/master/
|
| 70 |
-
|
| 71 |
-
Addition/Removal of Certificates
|
| 72 |
-
--------------------------------
|
| 73 |
-
|
| 74 |
-
Certifi does not support any addition/removal or other modification of the
|
| 75 |
-
CA trust store content. This project is intended to provide a reliable and
|
| 76 |
-
highly portable root of trust to python deployments. Look to upstream projects
|
| 77 |
-
for methods to use alternate trust.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
certifi-2025.1.31.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
-
certifi-2025.1.31.dist-info/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
|
| 3 |
-
certifi-2025.1.31.dist-info/METADATA,sha256=t5kcT5aGu0dQ6_psUNZYTqnC0uCRnponewm3uYjeHbg,2451
|
| 4 |
-
certifi-2025.1.31.dist-info/RECORD,,
|
| 5 |
-
certifi-2025.1.31.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
| 6 |
-
certifi-2025.1.31.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
|
| 7 |
-
certifi/__init__.py,sha256=neIaAf7BM36ygmQCmy-ZsSyjnvjWghFeu13wwEAnjj0,94
|
| 8 |
-
certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
|
| 9 |
-
certifi/__pycache__/__init__.cpython-312.pyc,,
|
| 10 |
-
certifi/__pycache__/__main__.cpython-312.pyc,,
|
| 11 |
-
certifi/__pycache__/core.cpython-312.pyc,,
|
| 12 |
-
certifi/cacert.pem,sha256=xVsh-Qf3-G1IrdCTVS-1ZRdJ_1-GBQjMu0I9bB-9gMc,297255
|
| 13 |
-
certifi/core.py,sha256=qRDDFyXVJwTB_EmoGppaXU_R9qCZvhl-EzxPMuV3nTA,4426
|
| 14 |
-
certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/WHEEL
DELETED
|
@@ -1,5 +0,0 @@
|
|
| 1 |
-
Wheel-Version: 1.0
|
| 2 |
-
Generator: setuptools (75.8.0)
|
| 3 |
-
Root-Is-Purelib: true
|
| 4 |
-
Tag: py3-none-any
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/certifi-2025.1.31.dist-info/top_level.txt
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
certifi
|
|
|
|
|
|
hf_env/Lib/site-packages/certifi/__init__.py
DELETED
|
@@ -1,4 +0,0 @@
|
|
| 1 |
-
from .core import contents, where
|
| 2 |
-
|
| 3 |
-
__all__ = ["contents", "where"]
|
| 4 |
-
__version__ = "2025.01.31"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/certifi/__main__.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
import argparse
|
| 2 |
-
|
| 3 |
-
from certifi import contents, where
|
| 4 |
-
|
| 5 |
-
parser = argparse.ArgumentParser()
|
| 6 |
-
parser.add_argument("-c", "--contents", action="store_true")
|
| 7 |
-
args = parser.parse_args()
|
| 8 |
-
|
| 9 |
-
if args.contents:
|
| 10 |
-
print(contents())
|
| 11 |
-
else:
|
| 12 |
-
print(where())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/certifi/__pycache__/__init__.cpython-312.pyc
DELETED
|
Binary file (305 Bytes)
|
|
|
hf_env/Lib/site-packages/certifi/__pycache__/__main__.cpython-312.pyc
DELETED
|
Binary file (620 Bytes)
|
|
|
hf_env/Lib/site-packages/certifi/__pycache__/core.cpython-312.pyc
DELETED
|
Binary file (3.18 kB)
|
|
|
hf_env/Lib/site-packages/certifi/cacert.pem
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf_env/Lib/site-packages/certifi/core.py
DELETED
|
@@ -1,114 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
certifi.py
|
| 3 |
-
~~~~~~~~~~
|
| 4 |
-
|
| 5 |
-
This module returns the installation location of cacert.pem or its contents.
|
| 6 |
-
"""
|
| 7 |
-
import sys
|
| 8 |
-
import atexit
|
| 9 |
-
|
| 10 |
-
def exit_cacert_ctx() -> None:
|
| 11 |
-
_CACERT_CTX.__exit__(None, None, None) # type: ignore[union-attr]
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
if sys.version_info >= (3, 11):
|
| 15 |
-
|
| 16 |
-
from importlib.resources import as_file, files
|
| 17 |
-
|
| 18 |
-
_CACERT_CTX = None
|
| 19 |
-
_CACERT_PATH = None
|
| 20 |
-
|
| 21 |
-
def where() -> str:
|
| 22 |
-
# This is slightly terrible, but we want to delay extracting the file
|
| 23 |
-
# in cases where we're inside of a zipimport situation until someone
|
| 24 |
-
# actually calls where(), but we don't want to re-extract the file
|
| 25 |
-
# on every call of where(), so we'll do it once then store it in a
|
| 26 |
-
# global variable.
|
| 27 |
-
global _CACERT_CTX
|
| 28 |
-
global _CACERT_PATH
|
| 29 |
-
if _CACERT_PATH is None:
|
| 30 |
-
# This is slightly janky, the importlib.resources API wants you to
|
| 31 |
-
# manage the cleanup of this file, so it doesn't actually return a
|
| 32 |
-
# path, it returns a context manager that will give you the path
|
| 33 |
-
# when you enter it and will do any cleanup when you leave it. In
|
| 34 |
-
# the common case of not needing a temporary file, it will just
|
| 35 |
-
# return the file system location and the __exit__() is a no-op.
|
| 36 |
-
#
|
| 37 |
-
# We also have to hold onto the actual context manager, because
|
| 38 |
-
# it will do the cleanup whenever it gets garbage collected, so
|
| 39 |
-
# we will also store that at the global level as well.
|
| 40 |
-
_CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
|
| 41 |
-
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
| 42 |
-
atexit.register(exit_cacert_ctx)
|
| 43 |
-
|
| 44 |
-
return _CACERT_PATH
|
| 45 |
-
|
| 46 |
-
def contents() -> str:
|
| 47 |
-
return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
|
| 48 |
-
|
| 49 |
-
elif sys.version_info >= (3, 7):
|
| 50 |
-
|
| 51 |
-
from importlib.resources import path as get_path, read_text
|
| 52 |
-
|
| 53 |
-
_CACERT_CTX = None
|
| 54 |
-
_CACERT_PATH = None
|
| 55 |
-
|
| 56 |
-
def where() -> str:
|
| 57 |
-
# This is slightly terrible, but we want to delay extracting the
|
| 58 |
-
# file in cases where we're inside of a zipimport situation until
|
| 59 |
-
# someone actually calls where(), but we don't want to re-extract
|
| 60 |
-
# the file on every call of where(), so we'll do it once then store
|
| 61 |
-
# it in a global variable.
|
| 62 |
-
global _CACERT_CTX
|
| 63 |
-
global _CACERT_PATH
|
| 64 |
-
if _CACERT_PATH is None:
|
| 65 |
-
# This is slightly janky, the importlib.resources API wants you
|
| 66 |
-
# to manage the cleanup of this file, so it doesn't actually
|
| 67 |
-
# return a path, it returns a context manager that will give
|
| 68 |
-
# you the path when you enter it and will do any cleanup when
|
| 69 |
-
# you leave it. In the common case of not needing a temporary
|
| 70 |
-
# file, it will just return the file system location and the
|
| 71 |
-
# __exit__() is a no-op.
|
| 72 |
-
#
|
| 73 |
-
# We also have to hold onto the actual context manager, because
|
| 74 |
-
# it will do the cleanup whenever it gets garbage collected, so
|
| 75 |
-
# we will also store that at the global level as well.
|
| 76 |
-
_CACERT_CTX = get_path("certifi", "cacert.pem")
|
| 77 |
-
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
| 78 |
-
atexit.register(exit_cacert_ctx)
|
| 79 |
-
|
| 80 |
-
return _CACERT_PATH
|
| 81 |
-
|
| 82 |
-
def contents() -> str:
|
| 83 |
-
return read_text("certifi", "cacert.pem", encoding="ascii")
|
| 84 |
-
|
| 85 |
-
else:
|
| 86 |
-
import os
|
| 87 |
-
import types
|
| 88 |
-
from typing import Union
|
| 89 |
-
|
| 90 |
-
Package = Union[types.ModuleType, str]
|
| 91 |
-
Resource = Union[str, "os.PathLike"]
|
| 92 |
-
|
| 93 |
-
# This fallback will work for Python versions prior to 3.7 that lack the
|
| 94 |
-
# importlib.resources module but relies on the existing `where` function
|
| 95 |
-
# so won't address issues with environments like PyOxidizer that don't set
|
| 96 |
-
# __file__ on modules.
|
| 97 |
-
def read_text(
|
| 98 |
-
package: Package,
|
| 99 |
-
resource: Resource,
|
| 100 |
-
encoding: str = 'utf-8',
|
| 101 |
-
errors: str = 'strict'
|
| 102 |
-
) -> str:
|
| 103 |
-
with open(where(), encoding=encoding) as data:
|
| 104 |
-
return data.read()
|
| 105 |
-
|
| 106 |
-
# If we don't have importlib.resources, then we will just do the old logic
|
| 107 |
-
# of assuming we're on the filesystem and munge the path directly.
|
| 108 |
-
def where() -> str:
|
| 109 |
-
f = os.path.dirname(__file__)
|
| 110 |
-
|
| 111 |
-
return os.path.join(f, "cacert.pem")
|
| 112 |
-
|
| 113 |
-
def contents() -> str:
|
| 114 |
-
return read_text("certifi", "cacert.pem", encoding="ascii")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/certifi/py.typed
DELETED
|
File without changes
|
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
pip
|
|
|
|
|
|
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
MIT License
|
| 2 |
-
|
| 3 |
-
Copyright (c) 2025 TAHRI Ahmed R.
|
| 4 |
-
|
| 5 |
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
-
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
-
in the Software without restriction, including without limitation the rights
|
| 8 |
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
-
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
-
furnished to do so, subject to the following conditions:
|
| 11 |
-
|
| 12 |
-
The above copyright notice and this permission notice shall be included in all
|
| 13 |
-
copies or substantial portions of the Software.
|
| 14 |
-
|
| 15 |
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
-
SOFTWARE.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/METADATA
DELETED
|
@@ -1,721 +0,0 @@
|
|
| 1 |
-
Metadata-Version: 2.1
|
| 2 |
-
Name: charset-normalizer
|
| 3 |
-
Version: 3.4.1
|
| 4 |
-
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
| 5 |
-
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
| 6 |
-
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
| 7 |
-
License: MIT
|
| 8 |
-
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
| 9 |
-
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
| 10 |
-
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
| 11 |
-
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
| 12 |
-
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
| 13 |
-
Classifier: Development Status :: 5 - Production/Stable
|
| 14 |
-
Classifier: Intended Audience :: Developers
|
| 15 |
-
Classifier: License :: OSI Approved :: MIT License
|
| 16 |
-
Classifier: Operating System :: OS Independent
|
| 17 |
-
Classifier: Programming Language :: Python
|
| 18 |
-
Classifier: Programming Language :: Python :: 3
|
| 19 |
-
Classifier: Programming Language :: Python :: 3.7
|
| 20 |
-
Classifier: Programming Language :: Python :: 3.8
|
| 21 |
-
Classifier: Programming Language :: Python :: 3.9
|
| 22 |
-
Classifier: Programming Language :: Python :: 3.10
|
| 23 |
-
Classifier: Programming Language :: Python :: 3.11
|
| 24 |
-
Classifier: Programming Language :: Python :: 3.12
|
| 25 |
-
Classifier: Programming Language :: Python :: 3.13
|
| 26 |
-
Classifier: Programming Language :: Python :: 3 :: Only
|
| 27 |
-
Classifier: Programming Language :: Python :: Implementation :: CPython
|
| 28 |
-
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
| 29 |
-
Classifier: Topic :: Text Processing :: Linguistic
|
| 30 |
-
Classifier: Topic :: Utilities
|
| 31 |
-
Classifier: Typing :: Typed
|
| 32 |
-
Requires-Python: >=3.7
|
| 33 |
-
Description-Content-Type: text/markdown
|
| 34 |
-
License-File: LICENSE
|
| 35 |
-
Provides-Extra: unicode-backport
|
| 36 |
-
|
| 37 |
-
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
| 38 |
-
|
| 39 |
-
<p align="center">
|
| 40 |
-
<sup>The Real First Universal Charset Detector</sup><br>
|
| 41 |
-
<a href="https://pypi.org/project/charset-normalizer">
|
| 42 |
-
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
| 43 |
-
</a>
|
| 44 |
-
<a href="https://pepy.tech/project/charset-normalizer/">
|
| 45 |
-
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
| 46 |
-
</a>
|
| 47 |
-
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
| 48 |
-
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
| 49 |
-
</a>
|
| 50 |
-
</p>
|
| 51 |
-
<p align="center">
|
| 52 |
-
<sup><i>Featured Packages</i></sup><br>
|
| 53 |
-
<a href="https://github.com/jawah/niquests">
|
| 54 |
-
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Best_HTTP_Client-cyan">
|
| 55 |
-
</a>
|
| 56 |
-
<a href="https://github.com/jawah/wassima">
|
| 57 |
-
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Killer-cyan">
|
| 58 |
-
</a>
|
| 59 |
-
</p>
|
| 60 |
-
<p align="center">
|
| 61 |
-
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
| 62 |
-
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
| 63 |
-
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
| 64 |
-
</a>
|
| 65 |
-
</p>
|
| 66 |
-
|
| 67 |
-
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
| 68 |
-
> I'm trying to resolve the issue by taking a new approach.
|
| 69 |
-
> All IANA character set names for which the Python core library provides codecs are supported.
|
| 70 |
-
|
| 71 |
-
<p align="center">
|
| 72 |
-
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
| 73 |
-
</p>
|
| 74 |
-
|
| 75 |
-
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
| 76 |
-
|
| 77 |
-
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
| 78 |
-
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
| 79 |
-
| `Fast` | ❌ | ✅ | ✅ |
|
| 80 |
-
| `Universal**` | ❌ | ✅ | ❌ |
|
| 81 |
-
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
| 82 |
-
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
| 83 |
-
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
| 84 |
-
| `Native Python` | ✅ | ✅ | ❌ |
|
| 85 |
-
| `Detect spoken language` | ❌ | ✅ | N/A |
|
| 86 |
-
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
| 87 |
-
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
| 88 |
-
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
| 89 |
-
|
| 90 |
-
<p align="center">
|
| 91 |
-
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
| 92 |
-
</p>
|
| 93 |
-
|
| 94 |
-
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
| 95 |
-
|
| 96 |
-
## ⚡ Performance
|
| 97 |
-
|
| 98 |
-
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
| 99 |
-
|
| 100 |
-
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
| 101 |
-
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
| 102 |
-
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
| 103 |
-
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
| 104 |
-
|
| 105 |
-
| Package | 99th percentile | 95th percentile | 50th percentile |
|
| 106 |
-
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
| 107 |
-
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
| 108 |
-
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
| 109 |
-
|
| 110 |
-
_updated as of december 2024 using CPython 3.12_
|
| 111 |
-
|
| 112 |
-
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
| 113 |
-
|
| 114 |
-
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
| 115 |
-
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
| 116 |
-
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
| 117 |
-
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
| 118 |
-
> (e.g. Supported Encoding) Challenge-them if you want.
|
| 119 |
-
|
| 120 |
-
## ✨ Installation
|
| 121 |
-
|
| 122 |
-
Using pip:
|
| 123 |
-
|
| 124 |
-
```sh
|
| 125 |
-
pip install charset-normalizer -U
|
| 126 |
-
```
|
| 127 |
-
|
| 128 |
-
## 🚀 Basic Usage
|
| 129 |
-
|
| 130 |
-
### CLI
|
| 131 |
-
This package comes with a CLI.
|
| 132 |
-
|
| 133 |
-
```
|
| 134 |
-
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
| 135 |
-
file [file ...]
|
| 136 |
-
|
| 137 |
-
The Real First Universal Charset Detector. Discover originating encoding used
|
| 138 |
-
on text file. Normalize text to unicode.
|
| 139 |
-
|
| 140 |
-
positional arguments:
|
| 141 |
-
files File(s) to be analysed
|
| 142 |
-
|
| 143 |
-
optional arguments:
|
| 144 |
-
-h, --help show this help message and exit
|
| 145 |
-
-v, --verbose Display complementary information about file if any.
|
| 146 |
-
Stdout will contain logs about the detection process.
|
| 147 |
-
-a, --with-alternative
|
| 148 |
-
Output complementary possibilities if any. Top-level
|
| 149 |
-
JSON WILL be a list.
|
| 150 |
-
-n, --normalize Permit to normalize input file. If not set, program
|
| 151 |
-
does not write anything.
|
| 152 |
-
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
| 153 |
-
JSON output.
|
| 154 |
-
-r, --replace Replace file when trying to normalize it instead of
|
| 155 |
-
creating a new one.
|
| 156 |
-
-f, --force Replace file without asking if you are sure, use this
|
| 157 |
-
flag with caution.
|
| 158 |
-
-t THRESHOLD, --threshold THRESHOLD
|
| 159 |
-
Define a custom maximum amount of chaos allowed in
|
| 160 |
-
decoded content. 0. <= chaos <= 1.
|
| 161 |
-
--version Show version information and exit.
|
| 162 |
-
```
|
| 163 |
-
|
| 164 |
-
```bash
|
| 165 |
-
normalizer ./data/sample.1.fr.srt
|
| 166 |
-
```
|
| 167 |
-
|
| 168 |
-
or
|
| 169 |
-
|
| 170 |
-
```bash
|
| 171 |
-
python -m charset_normalizer ./data/sample.1.fr.srt
|
| 172 |
-
```
|
| 173 |
-
|
| 174 |
-
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
| 175 |
-
|
| 176 |
-
```json
|
| 177 |
-
{
|
| 178 |
-
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
| 179 |
-
"encoding": "cp1252",
|
| 180 |
-
"encoding_aliases": [
|
| 181 |
-
"1252",
|
| 182 |
-
"windows_1252"
|
| 183 |
-
],
|
| 184 |
-
"alternative_encodings": [
|
| 185 |
-
"cp1254",
|
| 186 |
-
"cp1256",
|
| 187 |
-
"cp1258",
|
| 188 |
-
"iso8859_14",
|
| 189 |
-
"iso8859_15",
|
| 190 |
-
"iso8859_16",
|
| 191 |
-
"iso8859_3",
|
| 192 |
-
"iso8859_9",
|
| 193 |
-
"latin_1",
|
| 194 |
-
"mbcs"
|
| 195 |
-
],
|
| 196 |
-
"language": "French",
|
| 197 |
-
"alphabets": [
|
| 198 |
-
"Basic Latin",
|
| 199 |
-
"Latin-1 Supplement"
|
| 200 |
-
],
|
| 201 |
-
"has_sig_or_bom": false,
|
| 202 |
-
"chaos": 0.149,
|
| 203 |
-
"coherence": 97.152,
|
| 204 |
-
"unicode_path": null,
|
| 205 |
-
"is_preferred": true
|
| 206 |
-
}
|
| 207 |
-
```
|
| 208 |
-
|
| 209 |
-
### Python
|
| 210 |
-
*Just print out normalized text*
|
| 211 |
-
```python
|
| 212 |
-
from charset_normalizer import from_path
|
| 213 |
-
|
| 214 |
-
results = from_path('./my_subtitle.srt')
|
| 215 |
-
|
| 216 |
-
print(str(results.best()))
|
| 217 |
-
```
|
| 218 |
-
|
| 219 |
-
*Upgrade your code without effort*
|
| 220 |
-
```python
|
| 221 |
-
from charset_normalizer import detect
|
| 222 |
-
```
|
| 223 |
-
|
| 224 |
-
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
| 225 |
-
|
| 226 |
-
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
| 227 |
-
|
| 228 |
-
## 😇 Why
|
| 229 |
-
|
| 230 |
-
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
| 231 |
-
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
| 232 |
-
|
| 233 |
-
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
| 234 |
-
produce **two identical rendered string.**
|
| 235 |
-
What I want is to get readable text, the best I can.
|
| 236 |
-
|
| 237 |
-
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
| 238 |
-
|
| 239 |
-
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
| 240 |
-
|
| 241 |
-
## 🍰 How
|
| 242 |
-
|
| 243 |
-
- Discard all charset encoding table that could not fit the binary content.
|
| 244 |
-
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
| 245 |
-
- Extract matches with the lowest mess detected.
|
| 246 |
-
- Additionally, we measure coherence / probe for a language.
|
| 247 |
-
|
| 248 |
-
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
| 249 |
-
|
| 250 |
-
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
| 251 |
-
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
| 252 |
-
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
| 253 |
-
improve or rewrite it.
|
| 254 |
-
|
| 255 |
-
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
| 256 |
-
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
| 257 |
-
|
| 258 |
-
## ⚡ Known limitations
|
| 259 |
-
|
| 260 |
-
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
| 261 |
-
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
| 262 |
-
|
| 263 |
-
## ⚠️ About Python EOLs
|
| 264 |
-
|
| 265 |
-
**If you are running:**
|
| 266 |
-
|
| 267 |
-
- Python >=2.7,<3.5: Unsupported
|
| 268 |
-
- Python 3.5: charset-normalizer < 2.1
|
| 269 |
-
- Python 3.6: charset-normalizer < 3.1
|
| 270 |
-
- Python 3.7: charset-normalizer < 4.0
|
| 271 |
-
|
| 272 |
-
Upgrade your Python interpreter as soon as possible.
|
| 273 |
-
|
| 274 |
-
## 👤 Contributing
|
| 275 |
-
|
| 276 |
-
Contributions, issues and feature requests are very much welcome.<br />
|
| 277 |
-
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
| 278 |
-
|
| 279 |
-
## 📝 License
|
| 280 |
-
|
| 281 |
-
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
| 282 |
-
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
| 283 |
-
|
| 284 |
-
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
| 285 |
-
|
| 286 |
-
## 💼 For Enterprise
|
| 287 |
-
|
| 288 |
-
Professional support for charset-normalizer is available as part of the [Tidelift
|
| 289 |
-
Subscription][1]. Tidelift gives software development teams a single source for
|
| 290 |
-
purchasing and maintaining their software, with professional grade assurances
|
| 291 |
-
from the experts who know it best, while seamlessly integrating with existing
|
| 292 |
-
tools.
|
| 293 |
-
|
| 294 |
-
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
| 295 |
-
|
| 296 |
-
[](https://www.bestpractices.dev/projects/7297)
|
| 297 |
-
|
| 298 |
-
# Changelog
|
| 299 |
-
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
| 300 |
-
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
| 301 |
-
|
| 302 |
-
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
| 303 |
-
|
| 304 |
-
### Changed
|
| 305 |
-
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
| 306 |
-
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
| 307 |
-
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
| 308 |
-
|
| 309 |
-
### Added
|
| 310 |
-
- pre-commit configuration.
|
| 311 |
-
- noxfile.
|
| 312 |
-
|
| 313 |
-
### Removed
|
| 314 |
-
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
| 315 |
-
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
| 316 |
-
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
| 317 |
-
- Unused `utils.range_scan` function.
|
| 318 |
-
|
| 319 |
-
### Fixed
|
| 320 |
-
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
| 321 |
-
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
| 322 |
-
|
| 323 |
-
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
| 324 |
-
|
| 325 |
-
### Added
|
| 326 |
-
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
| 327 |
-
- Support for Python 3.13 (#512)
|
| 328 |
-
|
| 329 |
-
### Fixed
|
| 330 |
-
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
| 331 |
-
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
| 332 |
-
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
| 333 |
-
|
| 334 |
-
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
| 335 |
-
|
| 336 |
-
### Fixed
|
| 337 |
-
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
| 338 |
-
- Regression on some detection case showcased in the documentation (#371)
|
| 339 |
-
|
| 340 |
-
### Added
|
| 341 |
-
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
| 342 |
-
|
| 343 |
-
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
| 344 |
-
|
| 345 |
-
### Changed
|
| 346 |
-
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
| 347 |
-
- Improved the general detection reliability based on reports from the community
|
| 348 |
-
|
| 349 |
-
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
| 350 |
-
|
| 351 |
-
### Added
|
| 352 |
-
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
| 353 |
-
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
| 354 |
-
|
| 355 |
-
### Removed
|
| 356 |
-
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
| 357 |
-
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
| 358 |
-
|
| 359 |
-
### Changed
|
| 360 |
-
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
| 361 |
-
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
| 362 |
-
|
| 363 |
-
### Fixed
|
| 364 |
-
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
| 365 |
-
|
| 366 |
-
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
| 367 |
-
|
| 368 |
-
### Changed
|
| 369 |
-
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
| 370 |
-
- Minor improvement over the global detection reliability
|
| 371 |
-
|
| 372 |
-
### Added
|
| 373 |
-
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
| 374 |
-
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
| 375 |
-
- Explicit support for Python 3.12
|
| 376 |
-
|
| 377 |
-
### Fixed
|
| 378 |
-
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
| 379 |
-
|
| 380 |
-
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
| 381 |
-
|
| 382 |
-
### Added
|
| 383 |
-
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
| 384 |
-
|
| 385 |
-
### Removed
|
| 386 |
-
- Support for Python 3.6 (PR #260)
|
| 387 |
-
|
| 388 |
-
### Changed
|
| 389 |
-
- Optional speedup provided by mypy/c 1.0.1
|
| 390 |
-
|
| 391 |
-
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
| 392 |
-
|
| 393 |
-
### Fixed
|
| 394 |
-
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
| 395 |
-
|
| 396 |
-
### Changed
|
| 397 |
-
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
| 398 |
-
|
| 399 |
-
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
| 400 |
-
|
| 401 |
-
### Added
|
| 402 |
-
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
| 403 |
-
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
| 404 |
-
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
| 405 |
-
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
| 406 |
-
|
| 407 |
-
### Changed
|
| 408 |
-
- Build with static metadata using 'build' frontend
|
| 409 |
-
- Make the language detection stricter
|
| 410 |
-
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
| 411 |
-
|
| 412 |
-
### Fixed
|
| 413 |
-
- CLI with opt --normalize fail when using full path for files
|
| 414 |
-
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
| 415 |
-
- Sphinx warnings when generating the documentation
|
| 416 |
-
|
| 417 |
-
### Removed
|
| 418 |
-
- Coherence detector no longer return 'Simple English' instead return 'English'
|
| 419 |
-
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
| 420 |
-
- Breaking: Method `first()` and `best()` from CharsetMatch
|
| 421 |
-
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
| 422 |
-
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
| 423 |
-
- Breaking: Top-level function `normalize`
|
| 424 |
-
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
| 425 |
-
- Support for the backport `unicodedata2`
|
| 426 |
-
|
| 427 |
-
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
| 428 |
-
|
| 429 |
-
### Added
|
| 430 |
-
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
| 431 |
-
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
| 432 |
-
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
| 433 |
-
|
| 434 |
-
### Changed
|
| 435 |
-
- Build with static metadata using 'build' frontend
|
| 436 |
-
- Make the language detection stricter
|
| 437 |
-
|
| 438 |
-
### Fixed
|
| 439 |
-
- CLI with opt --normalize fail when using full path for files
|
| 440 |
-
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
| 441 |
-
|
| 442 |
-
### Removed
|
| 443 |
-
- Coherence detector no longer return 'Simple English' instead return 'English'
|
| 444 |
-
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
| 445 |
-
|
| 446 |
-
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
| 447 |
-
|
| 448 |
-
### Added
|
| 449 |
-
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
| 450 |
-
|
| 451 |
-
### Removed
|
| 452 |
-
- Breaking: Method `first()` and `best()` from CharsetMatch
|
| 453 |
-
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
| 454 |
-
|
| 455 |
-
### Fixed
|
| 456 |
-
- Sphinx warnings when generating the documentation
|
| 457 |
-
|
| 458 |
-
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
| 459 |
-
|
| 460 |
-
### Changed
|
| 461 |
-
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
| 462 |
-
|
| 463 |
-
### Removed
|
| 464 |
-
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
| 465 |
-
- Breaking: Top-level function `normalize`
|
| 466 |
-
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
| 467 |
-
- Support for the backport `unicodedata2`
|
| 468 |
-
|
| 469 |
-
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
| 470 |
-
|
| 471 |
-
### Deprecated
|
| 472 |
-
- Function `normalize` scheduled for removal in 3.0
|
| 473 |
-
|
| 474 |
-
### Changed
|
| 475 |
-
- Removed useless call to decode in fn is_unprintable (#206)
|
| 476 |
-
|
| 477 |
-
### Fixed
|
| 478 |
-
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
| 479 |
-
|
| 480 |
-
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
| 481 |
-
|
| 482 |
-
### Added
|
| 483 |
-
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
| 484 |
-
|
| 485 |
-
### Changed
|
| 486 |
-
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
| 487 |
-
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
| 488 |
-
|
| 489 |
-
### Fixed
|
| 490 |
-
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
| 491 |
-
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
| 492 |
-
|
| 493 |
-
### Removed
|
| 494 |
-
- Support for Python 3.5 (PR #192)
|
| 495 |
-
|
| 496 |
-
### Deprecated
|
| 497 |
-
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
| 498 |
-
|
| 499 |
-
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
| 500 |
-
|
| 501 |
-
### Fixed
|
| 502 |
-
- ASCII miss-detection on rare cases (PR #170)
|
| 503 |
-
|
| 504 |
-
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
| 505 |
-
|
| 506 |
-
### Added
|
| 507 |
-
- Explicit support for Python 3.11 (PR #164)
|
| 508 |
-
|
| 509 |
-
### Changed
|
| 510 |
-
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
| 511 |
-
|
| 512 |
-
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
| 513 |
-
|
| 514 |
-
### Fixed
|
| 515 |
-
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
| 516 |
-
|
| 517 |
-
### Changed
|
| 518 |
-
- Skipping the language-detection (CD) on ASCII (PR #155)
|
| 519 |
-
|
| 520 |
-
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
| 521 |
-
|
| 522 |
-
### Changed
|
| 523 |
-
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
| 524 |
-
|
| 525 |
-
### Fixed
|
| 526 |
-
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
| 527 |
-
|
| 528 |
-
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
| 529 |
-
### Changed
|
| 530 |
-
- Improvement over Vietnamese detection (PR #126)
|
| 531 |
-
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
| 532 |
-
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
| 533 |
-
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
| 534 |
-
- Code style as refactored by Sourcery-AI (PR #131)
|
| 535 |
-
- Minor adjustment on the MD around european words (PR #133)
|
| 536 |
-
- Remove and replace SRTs from assets / tests (PR #139)
|
| 537 |
-
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
| 538 |
-
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
| 539 |
-
|
| 540 |
-
### Fixed
|
| 541 |
-
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
| 542 |
-
- Avoid using too insignificant chunk (PR #137)
|
| 543 |
-
|
| 544 |
-
### Added
|
| 545 |
-
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
| 546 |
-
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
| 547 |
-
|
| 548 |
-
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
| 549 |
-
### Added
|
| 550 |
-
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
| 551 |
-
|
| 552 |
-
### Changed
|
| 553 |
-
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
| 554 |
-
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
| 555 |
-
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
| 556 |
-
- Various detection improvement (MD+CD) (PR #117)
|
| 557 |
-
|
| 558 |
-
### Removed
|
| 559 |
-
- Remove redundant logging entry about detected language(s) (PR #115)
|
| 560 |
-
|
| 561 |
-
### Fixed
|
| 562 |
-
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
| 563 |
-
|
| 564 |
-
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
| 565 |
-
### Fixed
|
| 566 |
-
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
| 567 |
-
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
| 568 |
-
|
| 569 |
-
### Changed
|
| 570 |
-
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
| 571 |
-
|
| 572 |
-
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
| 573 |
-
### Changed
|
| 574 |
-
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
| 575 |
-
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
| 576 |
-
- The Unicode detection is slightly improved (PR #93)
|
| 577 |
-
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
| 578 |
-
|
| 579 |
-
### Removed
|
| 580 |
-
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
| 581 |
-
|
| 582 |
-
### Fixed
|
| 583 |
-
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
| 584 |
-
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
| 585 |
-
- The MANIFEST.in was not exhaustive (PR #78)
|
| 586 |
-
|
| 587 |
-
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
| 588 |
-
### Fixed
|
| 589 |
-
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
| 590 |
-
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
| 591 |
-
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
| 592 |
-
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
| 593 |
-
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
| 594 |
-
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
| 595 |
-
|
| 596 |
-
### Changed
|
| 597 |
-
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
| 598 |
-
- Allow fallback on specified encoding if any (PR #71)
|
| 599 |
-
|
| 600 |
-
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
| 601 |
-
### Changed
|
| 602 |
-
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
| 603 |
-
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
| 604 |
-
|
| 605 |
-
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
| 606 |
-
### Fixed
|
| 607 |
-
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
| 608 |
-
|
| 609 |
-
### Changed
|
| 610 |
-
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
| 611 |
-
|
| 612 |
-
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
| 613 |
-
### Fixed
|
| 614 |
-
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
| 615 |
-
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
| 616 |
-
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
| 617 |
-
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
| 618 |
-
|
| 619 |
-
### Changed
|
| 620 |
-
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
| 621 |
-
|
| 622 |
-
### Added
|
| 623 |
-
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
| 624 |
-
|
| 625 |
-
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
| 626 |
-
### Changed
|
| 627 |
-
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
| 628 |
-
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
| 629 |
-
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
| 630 |
-
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
| 631 |
-
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
| 632 |
-
- utf_7 detection has been reinstated.
|
| 633 |
-
|
| 634 |
-
### Removed
|
| 635 |
-
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
| 636 |
-
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
| 637 |
-
- The exception hook on UnicodeDecodeError has been removed.
|
| 638 |
-
|
| 639 |
-
### Deprecated
|
| 640 |
-
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
| 641 |
-
|
| 642 |
-
### Fixed
|
| 643 |
-
- The CLI output used the relative path of the file(s). Should be absolute.
|
| 644 |
-
|
| 645 |
-
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
| 646 |
-
### Fixed
|
| 647 |
-
- Logger configuration/usage no longer conflict with others (PR #44)
|
| 648 |
-
|
| 649 |
-
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
| 650 |
-
### Removed
|
| 651 |
-
- Using standard logging instead of using the package loguru.
|
| 652 |
-
- Dropping nose test framework in favor of the maintained pytest.
|
| 653 |
-
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
| 654 |
-
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
| 655 |
-
- Stop support for UTF-7 that does not contain a SIG.
|
| 656 |
-
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
| 657 |
-
|
| 658 |
-
### Fixed
|
| 659 |
-
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
| 660 |
-
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
| 661 |
-
|
| 662 |
-
### Changed
|
| 663 |
-
- Improving the package final size by compressing frequencies.json.
|
| 664 |
-
- Huge improvement over the larges payload.
|
| 665 |
-
|
| 666 |
-
### Added
|
| 667 |
-
- CLI now produces JSON consumable output.
|
| 668 |
-
- Return ASCII if given sequences fit. Given reasonable confidence.
|
| 669 |
-
|
| 670 |
-
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
| 671 |
-
|
| 672 |
-
### Fixed
|
| 673 |
-
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
| 674 |
-
|
| 675 |
-
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
| 676 |
-
|
| 677 |
-
### Fixed
|
| 678 |
-
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
| 679 |
-
|
| 680 |
-
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
| 681 |
-
|
| 682 |
-
### Fixed
|
| 683 |
-
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
| 684 |
-
|
| 685 |
-
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
| 686 |
-
|
| 687 |
-
### Changed
|
| 688 |
-
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
| 689 |
-
|
| 690 |
-
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
| 691 |
-
|
| 692 |
-
### Fixed
|
| 693 |
-
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
| 694 |
-
|
| 695 |
-
### Changed
|
| 696 |
-
- Dependencies refactoring, constraints revised.
|
| 697 |
-
|
| 698 |
-
### Added
|
| 699 |
-
- Add python 3.9 and 3.10 to the supported interpreters
|
| 700 |
-
|
| 701 |
-
MIT License
|
| 702 |
-
|
| 703 |
-
Copyright (c) 2025 TAHRI Ahmed R.
|
| 704 |
-
|
| 705 |
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 706 |
-
of this software and associated documentation files (the "Software"), to deal
|
| 707 |
-
in the Software without restriction, including without limitation the rights
|
| 708 |
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 709 |
-
copies of the Software, and to permit persons to whom the Software is
|
| 710 |
-
furnished to do so, subject to the following conditions:
|
| 711 |
-
|
| 712 |
-
The above copyright notice and this permission notice shall be included in all
|
| 713 |
-
copies or substantial portions of the Software.
|
| 714 |
-
|
| 715 |
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 716 |
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 717 |
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 718 |
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 719 |
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 720 |
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 721 |
-
SOFTWARE.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/RECORD
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
../../Scripts/normalizer.exe,sha256=rjmQpEaKnhN2uxmQpPX5uvykP-ehMGTO6RzOXLN_RJY,108424
|
| 2 |
-
charset_normalizer-3.4.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 3 |
-
charset_normalizer-3.4.1.dist-info/LICENSE,sha256=GFd0hdNwTxpHne2OVzwJds_tMV_S_ReYP6mI2kwvcNE,1092
|
| 4 |
-
charset_normalizer-3.4.1.dist-info/METADATA,sha256=0_fAC3DknimRZusm6kkP4ylPD0JVzBq5mKHWLNBJM6w,36034
|
| 5 |
-
charset_normalizer-3.4.1.dist-info/RECORD,,
|
| 6 |
-
charset_normalizer-3.4.1.dist-info/WHEEL,sha256=pWXrJbnZSH-J-PhYmKs2XNn4DHCPNBYq965vsBJBFvA,101
|
| 7 |
-
charset_normalizer-3.4.1.dist-info/entry_points.txt,sha256=8C-Y3iXIfyXQ83Tpir2B8t-XLJYpxF5xbb38d_js-h4,65
|
| 8 |
-
charset_normalizer-3.4.1.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
|
| 9 |
-
charset_normalizer/__init__.py,sha256=0NT8MHi7SKq3juMqYfOdrkzjisK0L73lneNHH4qaUAs,1638
|
| 10 |
-
charset_normalizer/__main__.py,sha256=2sj_BS6H0sU25C1bMqz9DVwa6kOK9lchSEbSU-_iu7M,115
|
| 11 |
-
charset_normalizer/__pycache__/__init__.cpython-312.pyc,,
|
| 12 |
-
charset_normalizer/__pycache__/__main__.cpython-312.pyc,,
|
| 13 |
-
charset_normalizer/__pycache__/api.cpython-312.pyc,,
|
| 14 |
-
charset_normalizer/__pycache__/cd.cpython-312.pyc,,
|
| 15 |
-
charset_normalizer/__pycache__/constant.cpython-312.pyc,,
|
| 16 |
-
charset_normalizer/__pycache__/legacy.cpython-312.pyc,,
|
| 17 |
-
charset_normalizer/__pycache__/md.cpython-312.pyc,,
|
| 18 |
-
charset_normalizer/__pycache__/models.cpython-312.pyc,,
|
| 19 |
-
charset_normalizer/__pycache__/utils.cpython-312.pyc,,
|
| 20 |
-
charset_normalizer/__pycache__/version.cpython-312.pyc,,
|
| 21 |
-
charset_normalizer/api.py,sha256=2a0p2Gnhbdo9O6C04CNxTSN23fIbgOF20nxb0pWPNFM,23285
|
| 22 |
-
charset_normalizer/cd.py,sha256=uq8nVxRpR6Guc16ACvOWtL8KO3w7vYaCh8hHisuOyTg,12917
|
| 23 |
-
charset_normalizer/cli/__init__.py,sha256=d9MUx-1V_qD3x9igIy4JT4oC5CU0yjulk7QyZWeRFhg,144
|
| 24 |
-
charset_normalizer/cli/__main__.py,sha256=lZ89qRWun7FRxX0qm1GhK-m0DH0i048yiMAX1mVIuRg,10731
|
| 25 |
-
charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc,,
|
| 26 |
-
charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc,,
|
| 27 |
-
charset_normalizer/constant.py,sha256=7OKYi28cJjZxIcX3lQCwfK9ijoOgaVEbERww7SqqNSY,42475
|
| 28 |
-
charset_normalizer/legacy.py,sha256=v8An1aAQHUu036UWOhyIaDGkirZ0t4hfNVlyje5KInU,2394
|
| 29 |
-
charset_normalizer/md.cp312-win_amd64.pyd,sha256=XBGy--IKda7c3iBfvw_dovocqb2RSucmVtxvtlG_3tA,10752
|
| 30 |
-
charset_normalizer/md.py,sha256=e452fhwIAguEUr3FJzG7QZvFgXI-dVLOh_M1ZUiFI6U,20666
|
| 31 |
-
charset_normalizer/md__mypyc.cp312-win_amd64.pyd,sha256=_-jWSji0BgBVvrIHbmabYQNMBF4-xTusdO5mu6P8JsA,125440
|
| 32 |
-
charset_normalizer/models.py,sha256=ZR2PE-fqf6dASZfqdE5Uhkmr0o1MciSdXOjuNqwkmvg,12754
|
| 33 |
-
charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 34 |
-
charset_normalizer/utils.py,sha256=oH9Q3WcAMwmsSB7uM8uDozz9DXnkYecbkTNbdnMbgzI,12410
|
| 35 |
-
charset_normalizer/version.py,sha256=7_thI7FzRQxEsbtUYwrJs3FCFWF666mw74H8mggPRR0,123
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL
DELETED
|
@@ -1,5 +0,0 @@
|
|
| 1 |
-
Wheel-Version: 1.0
|
| 2 |
-
Generator: setuptools (75.6.0)
|
| 3 |
-
Root-Is-Purelib: false
|
| 4 |
-
Tag: cp312-cp312-win_amd64
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
[console_scripts]
|
| 2 |
-
normalizer = charset_normalizer:cli.cli_detect
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
charset_normalizer
|
|
|
|
|
|
hf_env/Lib/site-packages/charset_normalizer/__init__.py
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Charset-Normalizer
|
| 3 |
-
~~~~~~~~~~~~~~
|
| 4 |
-
The Real First Universal Charset Detector.
|
| 5 |
-
A library that helps you read text from an unknown charset encoding.
|
| 6 |
-
Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
|
| 7 |
-
All IANA character set names for which the Python core library provides codecs are supported.
|
| 8 |
-
|
| 9 |
-
Basic usage:
|
| 10 |
-
>>> from charset_normalizer import from_bytes
|
| 11 |
-
>>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
|
| 12 |
-
>>> best_guess = results.best()
|
| 13 |
-
>>> str(best_guess)
|
| 14 |
-
'Bсеки човек има право на образование. Oбразованието!'
|
| 15 |
-
|
| 16 |
-
Others methods and usages are available - see the full documentation
|
| 17 |
-
at <https://github.com/Ousret/charset_normalizer>.
|
| 18 |
-
:copyright: (c) 2021 by Ahmed TAHRI
|
| 19 |
-
:license: MIT, see LICENSE for more details.
|
| 20 |
-
"""
|
| 21 |
-
|
| 22 |
-
from __future__ import annotations
|
| 23 |
-
|
| 24 |
-
import logging
|
| 25 |
-
|
| 26 |
-
from .api import from_bytes, from_fp, from_path, is_binary
|
| 27 |
-
from .legacy import detect
|
| 28 |
-
from .models import CharsetMatch, CharsetMatches
|
| 29 |
-
from .utils import set_logging_handler
|
| 30 |
-
from .version import VERSION, __version__
|
| 31 |
-
|
| 32 |
-
__all__ = (
|
| 33 |
-
"from_fp",
|
| 34 |
-
"from_path",
|
| 35 |
-
"from_bytes",
|
| 36 |
-
"is_binary",
|
| 37 |
-
"detect",
|
| 38 |
-
"CharsetMatch",
|
| 39 |
-
"CharsetMatches",
|
| 40 |
-
"__version__",
|
| 41 |
-
"VERSION",
|
| 42 |
-
"set_logging_handler",
|
| 43 |
-
)
|
| 44 |
-
|
| 45 |
-
# Attach a NullHandler to the top level logger by default
|
| 46 |
-
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
|
| 47 |
-
|
| 48 |
-
logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_env/Lib/site-packages/charset_normalizer/__main__.py
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from .cli import cli_detect
|
| 4 |
-
|
| 5 |
-
if __name__ == "__main__":
|
| 6 |
-
cli_detect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|