test
Browse files- .env +6 -5
- __pycache__/config.cpython-312.pyc +0 -0
- app.py +6 -1
- app/__init__.py +26 -12
- app/__pycache__/__init__.cpython-312.pyc +0 -0
- app/models/__pycache__/facebook_ad.cpython-312.pyc +0 -0
- app/models/facebook_ad.py +113 -0
- app/routes/__pycache__/facebook_ads.cpython-312.pyc +0 -0
- app/routes/facebook_ads.py +310 -0
- app/services/__pycache__/ai_processor.cpython-312.pyc +0 -0
- app/services/__pycache__/facebook_scraper.cpython-312.pyc +0 -0
- app/services/ai_processor.py +109 -69
- app/services/facebook_scraper.py +417 -0
- app/templates/base.html +10 -2
- app/templates/facebook_ads/ad_analysis.html +123 -0
- app/templates/facebook_ads/ad_detail.html +129 -0
- app/templates/facebook_ads/advertiser_detail.html +141 -0
- app/templates/facebook_ads/advertisers.html +43 -0
- app/templates/facebook_ads/index.html +53 -0
- app/templates/facebook_ads/page_search.html +52 -0
- app/templates/facebook_ads/results.html +87 -0
- app/templates/facebook_ads/search.html +63 -0
- app/templates/login.html +24 -11
- celery.db +0 -0
- config.py +6 -3
- migrations/README +1 -0
- migrations/__pycache__/env.cpython-312.pyc +0 -0
- migrations/alembic.ini +50 -0
- migrations/env.py +113 -0
- migrations/script.py.mako +24 -0
- migrations/versions/__pycache__/dddcd665398d_add_facebook_ad_table.cpython-312.pyc +0 -0
- migrations/versions/dddcd665398d_add_facebook_ad_table.py +60 -0
- requirements.txt +21 -22
.env
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
FLASK_APP=app.py
|
| 2 |
FLASK_ENV=development
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
| 8 |
INSTANCE_PATH=/tmp/instance
|
| 9 |
SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub
|
|
|
|
| 1 |
FLASK_APP=app.py
|
| 2 |
FLASK_ENV=development
|
| 3 |
+
FLASK_DEBUG=1
|
| 4 |
+
SECRET_KEY=dev-secret-key-change-in-production
|
| 5 |
+
DATABASE_URL=sqlite:///app.db
|
| 6 |
+
CELERY_BROKER_URL=sqla+sqlite:///celery.db
|
| 7 |
+
CELERY_RESULT_BACKEND=db+sqlite:///celery-results.db
|
| 8 |
+
OPENAI_API_KEY=sk-mock-key-for-development
|
| 9 |
INSTANCE_PATH=/tmp/instance
|
| 10 |
SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub
|
__pycache__/config.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/config.cpython-312.pyc and b/__pycache__/config.cpython-312.pyc differ
|
|
|
app.py
CHANGED
|
@@ -2,6 +2,11 @@ from flask import Flask
|
|
| 2 |
from flask_migrate import Migrate
|
| 3 |
from app import db, create_app
|
| 4 |
from config import get_config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
migrate = Migrate()
|
| 7 |
|
|
@@ -10,6 +15,6 @@ def create_flask_app():
|
|
| 10 |
migrate.init_app(app, db)
|
| 11 |
return app
|
| 12 |
|
| 13 |
-
if __name__ ==
|
| 14 |
app = create_flask_app()
|
| 15 |
app.run(debug=True)
|
|
|
|
| 2 |
from flask_migrate import Migrate
|
| 3 |
from app import db, create_app
|
| 4 |
from config import get_config
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
# Load environment variables from .env file
|
| 9 |
+
load_dotenv()
|
| 10 |
|
| 11 |
migrate = Migrate()
|
| 12 |
|
|
|
|
| 15 |
migrate.init_app(app, db)
|
| 16 |
return app
|
| 17 |
|
| 18 |
+
if __name__ == '__main__':
|
| 19 |
app = create_flask_app()
|
| 20 |
app.run(debug=True)
|
app/__init__.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
from flask import Flask
|
| 2 |
from flask_sqlalchemy import SQLAlchemy
|
|
|
|
| 3 |
from flask_login import LoginManager
|
| 4 |
from celery import Celery
|
|
|
|
| 5 |
import redis
|
| 6 |
import os
|
| 7 |
import logging
|
|
@@ -18,9 +20,10 @@ logger = logging.getLogger(__name__)
|
|
| 18 |
|
| 19 |
# Initialize extensions
|
| 20 |
db = SQLAlchemy()
|
|
|
|
| 21 |
login = LoginManager()
|
| 22 |
login.login_view = 'auth.login'
|
| 23 |
-
celery = Celery(__name__)
|
| 24 |
cache = None # Initialize later when app context is available
|
| 25 |
|
| 26 |
# Set up user loader for Flask-Login
|
|
@@ -29,7 +32,7 @@ def load_user(user_id):
|
|
| 29 |
from .models import User
|
| 30 |
return User.query.get(int(user_id))
|
| 31 |
|
| 32 |
-
def create_app(config_class=
|
| 33 |
logger.info("Starting application initialization...")
|
| 34 |
|
| 35 |
# Create the Flask app
|
|
@@ -37,12 +40,8 @@ def create_app(config_class=None):
|
|
| 37 |
logger.info("Flask app created")
|
| 38 |
|
| 39 |
# Load configuration
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
app.config.from_object('config.Config')
|
| 43 |
-
else:
|
| 44 |
-
logger.info(f"Loading configuration from {config_class}...")
|
| 45 |
-
app.config.from_object(config_class)
|
| 46 |
|
| 47 |
# Ensure instance path exists
|
| 48 |
logger.info(f"Ensuring instance path exists: {app.config['INSTANCE_PATH']}")
|
|
@@ -52,6 +51,7 @@ def create_app(config_class=None):
|
|
| 52 |
# Initialize extensions
|
| 53 |
logger.info("Initializing SQLAlchemy...")
|
| 54 |
db.init_app(app)
|
|
|
|
| 55 |
|
| 56 |
logger.info("Initializing LoginManager...")
|
| 57 |
login.init_app(app)
|
|
@@ -64,12 +64,18 @@ def create_app(config_class=None):
|
|
| 64 |
try:
|
| 65 |
logger.info(f"Initializing Redis cache with URL: {app.config['CELERY_BROKER_URL']}")
|
| 66 |
global cache
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
except Exception as e:
|
| 70 |
logger.error(f"Error initializing Redis cache: {e}")
|
| 71 |
-
# Continue without Redis
|
| 72 |
-
|
|
|
|
| 73 |
|
| 74 |
# Register test blueprint first (this should always work)
|
| 75 |
logger.info("Registering test blueprint...")
|
|
@@ -124,6 +130,14 @@ def create_app(config_class=None):
|
|
| 124 |
except Exception as e:
|
| 125 |
logger.error(f"Error registering auth blueprint: {e}")
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
# Add a simple route directly to the app
|
| 128 |
@app.route('/')
|
| 129 |
def index():
|
|
|
|
| 1 |
from flask import Flask
|
| 2 |
from flask_sqlalchemy import SQLAlchemy
|
| 3 |
+
from flask_migrate import Migrate
|
| 4 |
from flask_login import LoginManager
|
| 5 |
from celery import Celery
|
| 6 |
+
from config import Config
|
| 7 |
import redis
|
| 8 |
import os
|
| 9 |
import logging
|
|
|
|
| 20 |
|
| 21 |
# Initialize extensions
|
| 22 |
db = SQLAlchemy()
|
| 23 |
+
migrate = Migrate()
|
| 24 |
login = LoginManager()
|
| 25 |
login.login_view = 'auth.login'
|
| 26 |
+
celery = Celery(__name__, broker=Config.CELERY_BROKER_URL)
|
| 27 |
cache = None # Initialize later when app context is available
|
| 28 |
|
| 29 |
# Set up user loader for Flask-Login
|
|
|
|
| 32 |
from .models import User
|
| 33 |
return User.query.get(int(user_id))
|
| 34 |
|
| 35 |
+
def create_app(config_class=Config):
|
| 36 |
logger.info("Starting application initialization...")
|
| 37 |
|
| 38 |
# Create the Flask app
|
|
|
|
| 40 |
logger.info("Flask app created")
|
| 41 |
|
| 42 |
# Load configuration
|
| 43 |
+
logger.info(f"Loading configuration from {config_class}...")
|
| 44 |
+
app.config.from_object(config_class)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# Ensure instance path exists
|
| 47 |
logger.info(f"Ensuring instance path exists: {app.config['INSTANCE_PATH']}")
|
|
|
|
| 51 |
# Initialize extensions
|
| 52 |
logger.info("Initializing SQLAlchemy...")
|
| 53 |
db.init_app(app)
|
| 54 |
+
migrate.init_app(app, db)
|
| 55 |
|
| 56 |
logger.info("Initializing LoginManager...")
|
| 57 |
login.init_app(app)
|
|
|
|
| 64 |
try:
|
| 65 |
logger.info(f"Initializing Redis cache with URL: {app.config['CELERY_BROKER_URL']}")
|
| 66 |
global cache
|
| 67 |
+
# Only try to connect to Redis if the URL starts with 'redis://'
|
| 68 |
+
if app.config['CELERY_BROKER_URL'].startswith('redis://'):
|
| 69 |
+
cache = redis.Redis.from_url(app.config['CELERY_BROKER_URL'])
|
| 70 |
+
logger.info("Redis cache initialized successfully")
|
| 71 |
+
else:
|
| 72 |
+
logger.info("Not using Redis cache as broker URL is not Redis")
|
| 73 |
+
cache = None
|
| 74 |
except Exception as e:
|
| 75 |
logger.error(f"Error initializing Redis cache: {e}")
|
| 76 |
+
# Continue without Redis
|
| 77 |
+
cache = None
|
| 78 |
+
logger.info("Continuing without Redis cache")
|
| 79 |
|
| 80 |
# Register test blueprint first (this should always work)
|
| 81 |
logger.info("Registering test blueprint...")
|
|
|
|
| 130 |
except Exception as e:
|
| 131 |
logger.error(f"Error registering auth blueprint: {e}")
|
| 132 |
|
| 133 |
+
try:
|
| 134 |
+
logger.info("Importing and registering Facebook Ads blueprint...")
|
| 135 |
+
from .routes.facebook_ads import facebook_ads_bp
|
| 136 |
+
app.register_blueprint(facebook_ads_bp)
|
| 137 |
+
logger.info("Facebook Ads blueprint registered successfully")
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logger.error(f"Error registering Facebook Ads blueprint: {e}")
|
| 140 |
+
|
| 141 |
# Add a simple route directly to the app
|
| 142 |
@app.route('/')
|
| 143 |
def index():
|
app/__pycache__/__init__.cpython-312.pyc
CHANGED
|
Binary files a/app/__pycache__/__init__.cpython-312.pyc and b/app/__pycache__/__init__.cpython-312.pyc differ
|
|
|
app/models/__pycache__/facebook_ad.cpython-312.pyc
ADDED
|
Binary file (5.78 kB). View file
|
|
|
app/models/facebook_ad.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app import db
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
import uuid
|
| 4 |
+
import json
|
| 5 |
+
|
| 6 |
+
class FacebookAd(db.Model):
|
| 7 |
+
"""Model for storing Facebook Ads data."""
|
| 8 |
+
id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
| 9 |
+
|
| 10 |
+
# Facebook-specific fields
|
| 11 |
+
ad_id = db.Column(db.String(255), nullable=True, index=True)
|
| 12 |
+
advertiser = db.Column(db.String(255), nullable=True, index=True)
|
| 13 |
+
advertiser_id = db.Column(db.String(255), nullable=True, index=True)
|
| 14 |
+
|
| 15 |
+
# Content fields
|
| 16 |
+
content = db.Column(db.Text, nullable=True)
|
| 17 |
+
images = db.Column(db.JSON, nullable=True) # URLs to images
|
| 18 |
+
links = db.Column(db.JSON, nullable=True) # URLs in the ad
|
| 19 |
+
|
| 20 |
+
# Search metadata
|
| 21 |
+
search_query = db.Column(db.String(255), nullable=True, index=True)
|
| 22 |
+
position = db.Column(db.Integer, nullable=True)
|
| 23 |
+
|
| 24 |
+
# Analysis results
|
| 25 |
+
sentiment = db.Column(db.JSON, nullable=True)
|
| 26 |
+
topics = db.Column(db.JSON, nullable=True)
|
| 27 |
+
entities = db.Column(db.JSON, nullable=True)
|
| 28 |
+
|
| 29 |
+
# Raw data for future processing
|
| 30 |
+
raw_data = db.Column(db.JSON, nullable=True)
|
| 31 |
+
raw_text = db.Column(db.Text, nullable=True)
|
| 32 |
+
|
| 33 |
+
# Timestamps
|
| 34 |
+
created_at = db.Column(db.DateTime, default=datetime.utcnow)
|
| 35 |
+
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
| 36 |
+
|
| 37 |
+
# User association
|
| 38 |
+
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=True)
|
| 39 |
+
|
| 40 |
+
def __repr__(self):
|
| 41 |
+
return f'<FacebookAd {self.id} - {self.advertiser}>'
|
| 42 |
+
|
| 43 |
+
@classmethod
|
| 44 |
+
def from_scraper_data(cls, ad_data, user_id=None):
|
| 45 |
+
"""Create a FacebookAd instance from scraped data."""
|
| 46 |
+
# Extract fields from the scraped data
|
| 47 |
+
ad = cls(
|
| 48 |
+
ad_id=ad_data.get('ad_id'),
|
| 49 |
+
advertiser=ad_data.get('advertiser'),
|
| 50 |
+
advertiser_id=ad_data.get('advertiser_id'),
|
| 51 |
+
content=ad_data.get('content'),
|
| 52 |
+
raw_text=ad_data.get('raw_text'),
|
| 53 |
+
search_query=ad_data.get('search_query'),
|
| 54 |
+
position=ad_data.get('position'),
|
| 55 |
+
user_id=user_id
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Handle JSON fields
|
| 59 |
+
if 'images' in ad_data and ad_data['images']:
|
| 60 |
+
ad.images = ad_data['images']
|
| 61 |
+
|
| 62 |
+
if 'links' in ad_data and ad_data['links']:
|
| 63 |
+
ad.links = ad_data['links']
|
| 64 |
+
|
| 65 |
+
# Store the full raw data for future reference
|
| 66 |
+
ad.raw_data = {k: v for k, v in ad_data.items() if k not in ['images', 'links']}
|
| 67 |
+
|
| 68 |
+
return ad
|
| 69 |
+
|
| 70 |
+
def get_image_urls(self):
|
| 71 |
+
"""Get list of image URLs from the ad."""
|
| 72 |
+
if not self.images:
|
| 73 |
+
return []
|
| 74 |
+
|
| 75 |
+
if isinstance(self.images, str):
|
| 76 |
+
try:
|
| 77 |
+
return json.loads(self.images)
|
| 78 |
+
except:
|
| 79 |
+
return []
|
| 80 |
+
|
| 81 |
+
return self.images
|
| 82 |
+
|
| 83 |
+
def get_links(self):
|
| 84 |
+
"""Get list of links from the ad."""
|
| 85 |
+
if not self.links:
|
| 86 |
+
return []
|
| 87 |
+
|
| 88 |
+
if isinstance(self.links, str):
|
| 89 |
+
try:
|
| 90 |
+
return json.loads(self.links)
|
| 91 |
+
except:
|
| 92 |
+
return []
|
| 93 |
+
|
| 94 |
+
return self.links
|
| 95 |
+
|
| 96 |
+
def to_dict(self):
|
| 97 |
+
"""Convert the ad to a dictionary for API responses."""
|
| 98 |
+
return {
|
| 99 |
+
'id': self.id,
|
| 100 |
+
'ad_id': self.ad_id,
|
| 101 |
+
'advertiser': self.advertiser,
|
| 102 |
+
'advertiser_id': self.advertiser_id,
|
| 103 |
+
'content': self.content,
|
| 104 |
+
'images': self.get_image_urls(),
|
| 105 |
+
'links': self.get_links(),
|
| 106 |
+
'search_query': self.search_query,
|
| 107 |
+
'position': self.position,
|
| 108 |
+
'sentiment': self.sentiment,
|
| 109 |
+
'topics': self.topics,
|
| 110 |
+
'entities': self.entities,
|
| 111 |
+
'created_at': self.created_at.isoformat() if self.created_at else None,
|
| 112 |
+
'updated_at': self.updated_at.isoformat() if self.updated_at else None
|
| 113 |
+
}
|
app/routes/__pycache__/facebook_ads.cpython-312.pyc
ADDED
|
Binary file (16.5 kB). View file
|
|
|
app/routes/facebook_ads.py
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Blueprint, render_template, request, jsonify, current_app, flash, redirect, url_for
|
| 2 |
+
from flask_login import login_required, current_user
|
| 3 |
+
from app.services.facebook_scraper import FacebookScraper
|
| 4 |
+
from app.models.facebook_ad import FacebookAd
|
| 5 |
+
from app.services.ai_processor import AIPipeline
|
| 6 |
+
from app import db, celery
|
| 7 |
+
import logging
|
| 8 |
+
import json
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
facebook_ads_bp = Blueprint('facebook_ads', __name__, url_prefix='/facebook-ads')
|
| 13 |
+
|
| 14 |
+
@facebook_ads_bp.route('/', methods=['GET'])
|
| 15 |
+
@login_required
|
| 16 |
+
def index():
|
| 17 |
+
"""Facebook Ads dashboard page."""
|
| 18 |
+
return render_template('facebook_ads/index.html')
|
| 19 |
+
|
| 20 |
+
@facebook_ads_bp.route('/search', methods=['GET', 'POST'])
|
| 21 |
+
@login_required
|
| 22 |
+
def search():
|
| 23 |
+
"""Search for Facebook ads."""
|
| 24 |
+
if request.method == 'POST':
|
| 25 |
+
search_query = request.form.get('search_query', '')
|
| 26 |
+
num_scrolls = int(request.form.get('num_scrolls', 5))
|
| 27 |
+
country_code = request.form.get('country_code', 'ALL')
|
| 28 |
+
|
| 29 |
+
if not search_query:
|
| 30 |
+
flash('Please enter a search query', 'warning')
|
| 31 |
+
return render_template('facebook_ads/search.html')
|
| 32 |
+
|
| 33 |
+
# Start the scraping task
|
| 34 |
+
task = scrape_facebook_ads.delay(search_query, num_scrolls, country_code, current_user.id)
|
| 35 |
+
|
| 36 |
+
flash(f'Started scraping Facebook ads for "{search_query}". This may take a few minutes.', 'info')
|
| 37 |
+
return render_template('facebook_ads/search.html', task_id=task.id)
|
| 38 |
+
|
| 39 |
+
return render_template('facebook_ads/search.html')
|
| 40 |
+
|
| 41 |
+
@facebook_ads_bp.route('/page-search', methods=['GET', 'POST'])
|
| 42 |
+
@login_required
|
| 43 |
+
def page_search():
|
| 44 |
+
"""Search for ads from a specific Facebook page."""
|
| 45 |
+
if request.method == 'POST':
|
| 46 |
+
page_name = request.form.get('page_name', '')
|
| 47 |
+
num_scrolls = int(request.form.get('num_scrolls', 5))
|
| 48 |
+
|
| 49 |
+
if not page_name:
|
| 50 |
+
flash('Please enter a page name', 'warning')
|
| 51 |
+
return render_template('facebook_ads/page_search.html')
|
| 52 |
+
|
| 53 |
+
# Start the scraping task
|
| 54 |
+
task = scrape_facebook_page_ads.delay(page_name, num_scrolls, current_user.id)
|
| 55 |
+
|
| 56 |
+
flash(f'Started scraping Facebook ads for page "{page_name}". This may take a few minutes.', 'info')
|
| 57 |
+
return render_template('facebook_ads/page_search.html', task_id=task.id)
|
| 58 |
+
|
| 59 |
+
return render_template('facebook_ads/page_search.html')
|
| 60 |
+
|
| 61 |
+
@facebook_ads_bp.route('/results', methods=['GET'])
|
| 62 |
+
@login_required
|
| 63 |
+
def results():
|
| 64 |
+
"""View Facebook ads results."""
|
| 65 |
+
ad_type = request.args.get('type', 'all')
|
| 66 |
+
query = request.args.get('query', '')
|
| 67 |
+
advertiser = request.args.get('advertiser', '')
|
| 68 |
+
|
| 69 |
+
# Build query
|
| 70 |
+
ads_query = FacebookAd.query
|
| 71 |
+
|
| 72 |
+
if query:
|
| 73 |
+
ads_query = ads_query.filter(FacebookAd.search_query.ilike(f'%{query}%'))
|
| 74 |
+
|
| 75 |
+
if advertiser:
|
| 76 |
+
ads_query = ads_query.filter(FacebookAd.advertiser.ilike(f'%{advertiser}%'))
|
| 77 |
+
|
| 78 |
+
# Get results
|
| 79 |
+
ads = ads_query.order_by(FacebookAd.created_at.desc()).limit(100).all()
|
| 80 |
+
|
| 81 |
+
return render_template('facebook_ads/results.html', ads=ads, query=query, advertiser=advertiser)
|
| 82 |
+
|
| 83 |
+
@facebook_ads_bp.route('/ad/<ad_id>', methods=['GET'])
|
| 84 |
+
@login_required
|
| 85 |
+
def view_ad(ad_id):
|
| 86 |
+
"""View details of a specific Facebook ad."""
|
| 87 |
+
ad = FacebookAd.query.get_or_404(ad_id)
|
| 88 |
+
return render_template('facebook_ads/ad_detail.html', ad=ad)
|
| 89 |
+
|
| 90 |
+
@facebook_ads_bp.route('/advertisers', methods=['GET'])
|
| 91 |
+
@login_required
|
| 92 |
+
def advertisers():
|
| 93 |
+
"""View list of advertisers."""
|
| 94 |
+
# Get unique advertisers and count their ads
|
| 95 |
+
advertisers_data = db.session.query(
|
| 96 |
+
FacebookAd.advertiser,
|
| 97 |
+
db.func.count(FacebookAd.id).label('ad_count')
|
| 98 |
+
).group_by(FacebookAd.advertiser).order_by(db.func.count(FacebookAd.id).desc()).limit(100).all()
|
| 99 |
+
|
| 100 |
+
return render_template('facebook_ads/advertisers.html', advertisers=advertisers_data)
|
| 101 |
+
|
| 102 |
+
@facebook_ads_bp.route('/advertiser/<advertiser_name>', methods=['GET'])
|
| 103 |
+
@login_required
|
| 104 |
+
def advertiser_detail(advertiser_name):
|
| 105 |
+
"""View details and ads for a specific advertiser."""
|
| 106 |
+
ads = FacebookAd.query.filter(FacebookAd.advertiser == advertiser_name).order_by(FacebookAd.created_at.desc()).all()
|
| 107 |
+
return render_template('facebook_ads/advertiser_detail.html', advertiser=advertiser_name, ads=ads)
|
| 108 |
+
|
| 109 |
+
@facebook_ads_bp.route('/analyze/<ad_id>', methods=['GET'])
|
| 110 |
+
@login_required
|
| 111 |
+
def analyze_ad(ad_id):
|
| 112 |
+
"""Analyze a specific Facebook ad."""
|
| 113 |
+
ad = FacebookAd.query.get_or_404(ad_id)
|
| 114 |
+
|
| 115 |
+
# Start the analysis task if not already analyzed
|
| 116 |
+
if not ad.sentiment or not ad.topics:
|
| 117 |
+
task = analyze_facebook_ad.delay(ad_id)
|
| 118 |
+
flash('Started analyzing the ad. Refresh in a few moments to see results.', 'info')
|
| 119 |
+
|
| 120 |
+
return render_template('facebook_ads/ad_analysis.html', ad=ad)
|
| 121 |
+
|
| 122 |
+
@facebook_ads_bp.route('/api/ads', methods=['GET'])
|
| 123 |
+
@login_required
|
| 124 |
+
def api_get_ads():
|
| 125 |
+
"""API endpoint to get Facebook Ads data."""
|
| 126 |
+
query = request.args.get('query', '')
|
| 127 |
+
advertiser = request.args.get('advertiser', '')
|
| 128 |
+
limit = int(request.args.get('limit', 50))
|
| 129 |
+
|
| 130 |
+
# Build query
|
| 131 |
+
ads_query = FacebookAd.query
|
| 132 |
+
|
| 133 |
+
if query:
|
| 134 |
+
ads_query = ads_query.filter(
|
| 135 |
+
(FacebookAd.content.ilike(f'%{query}%')) |
|
| 136 |
+
(FacebookAd.search_query.ilike(f'%{query}%'))
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
if advertiser:
|
| 140 |
+
ads_query = ads_query.filter(FacebookAd.advertiser.ilike(f'%{advertiser}%'))
|
| 141 |
+
|
| 142 |
+
# Get results
|
| 143 |
+
ads = ads_query.order_by(FacebookAd.created_at.desc()).limit(limit).all()
|
| 144 |
+
|
| 145 |
+
# Convert to JSON
|
| 146 |
+
result = [ad.to_dict() for ad in ads]
|
| 147 |
+
|
| 148 |
+
return jsonify(result)
|
| 149 |
+
|
| 150 |
+
@facebook_ads_bp.route('/api/advertisers', methods=['GET'])
|
| 151 |
+
@login_required
|
| 152 |
+
def api_get_advertisers():
|
| 153 |
+
"""API endpoint to get advertisers data."""
|
| 154 |
+
limit = int(request.args.get('limit', 50))
|
| 155 |
+
|
| 156 |
+
# Get unique advertisers and count their ads
|
| 157 |
+
advertisers_data = db.session.query(
|
| 158 |
+
FacebookAd.advertiser,
|
| 159 |
+
db.func.count(FacebookAd.id).label('ad_count')
|
| 160 |
+
).group_by(FacebookAd.advertiser).order_by(db.func.count(FacebookAd.id).desc()).limit(limit).all()
|
| 161 |
+
|
| 162 |
+
# Convert to JSON
|
| 163 |
+
result = [{"name": adv[0], "ad_count": adv[1]} for adv in advertisers_data if adv[0]]
|
| 164 |
+
|
| 165 |
+
return jsonify(result)
|
| 166 |
+
|
| 167 |
+
@celery.task
|
| 168 |
+
def scrape_facebook_ads(search_query, num_scrolls, country_code, user_id):
|
| 169 |
+
"""Celery task to scrape Facebook ads."""
|
| 170 |
+
try:
|
| 171 |
+
logger.info(f"Starting Facebook ads scraping for query: {search_query}")
|
| 172 |
+
|
| 173 |
+
# Initialize scraper
|
| 174 |
+
scraper = FacebookScraper()
|
| 175 |
+
|
| 176 |
+
# Scrape ads
|
| 177 |
+
ads_data = scraper.scrape_ads(search_query, num_scrolls, country_code)
|
| 178 |
+
|
| 179 |
+
logger.info(f"Scraped {len(ads_data)} Facebook ads")
|
| 180 |
+
|
| 181 |
+
# Process and store ads
|
| 182 |
+
ai_pipeline = AIPipeline()
|
| 183 |
+
|
| 184 |
+
for ad_data in ads_data:
|
| 185 |
+
# Create FacebookAd instance
|
| 186 |
+
ad = FacebookAd.from_scraper_data(ad_data, user_id)
|
| 187 |
+
|
| 188 |
+
# Process with AI if there's content
|
| 189 |
+
if ad.content:
|
| 190 |
+
try:
|
| 191 |
+
# Create a simple object with content for AI processing
|
| 192 |
+
ad_content = type('obj', (object,), {
|
| 193 |
+
'content': ad.content
|
| 194 |
+
})
|
| 195 |
+
|
| 196 |
+
# Process with AI
|
| 197 |
+
ai_results = ai_pipeline.process_ad(ad_content)
|
| 198 |
+
ad.sentiment = ai_results.get('sentiment')
|
| 199 |
+
except Exception as e:
|
| 200 |
+
logger.error(f"Error processing ad with AI: {e}")
|
| 201 |
+
|
| 202 |
+
# Save to database
|
| 203 |
+
db.session.add(ad)
|
| 204 |
+
|
| 205 |
+
db.session.commit()
|
| 206 |
+
logger.info(f"Saved {len(ads_data)} Facebook ads to database")
|
| 207 |
+
|
| 208 |
+
return {'status': 'success', 'count': len(ads_data)}
|
| 209 |
+
|
| 210 |
+
except Exception as e:
|
| 211 |
+
logger.error(f"Error in Facebook ads scraping task: {e}")
|
| 212 |
+
db.session.rollback()
|
| 213 |
+
return {'status': 'error', 'message': str(e)}
|
| 214 |
+
|
| 215 |
+
@celery.task
|
| 216 |
+
def scrape_facebook_page_ads(page_name, num_scrolls, user_id):
|
| 217 |
+
"""Celery task to scrape ads from a specific Facebook page."""
|
| 218 |
+
try:
|
| 219 |
+
logger.info(f"Starting Facebook page ads scraping for page: {page_name}")
|
| 220 |
+
|
| 221 |
+
# Initialize scraper
|
| 222 |
+
scraper = FacebookScraper()
|
| 223 |
+
|
| 224 |
+
# Scrape ads
|
| 225 |
+
ads_data = scraper.scrape_ads_by_page(page_name, num_scrolls)
|
| 226 |
+
|
| 227 |
+
logger.info(f"Scraped {len(ads_data)} Facebook ads from page {page_name}")
|
| 228 |
+
|
| 229 |
+
# Process and store ads
|
| 230 |
+
ai_pipeline = AIPipeline()
|
| 231 |
+
|
| 232 |
+
for ad_data in ads_data:
|
| 233 |
+
# Create FacebookAd instance
|
| 234 |
+
ad = FacebookAd.from_scraper_data(ad_data, user_id)
|
| 235 |
+
|
| 236 |
+
# Process with AI if there's content
|
| 237 |
+
if ad.content:
|
| 238 |
+
try:
|
| 239 |
+
# Create a simple object with content for AI processing
|
| 240 |
+
ad_content = type('obj', (object,), {
|
| 241 |
+
'content': ad.content
|
| 242 |
+
})
|
| 243 |
+
|
| 244 |
+
# Process with AI
|
| 245 |
+
ai_results = ai_pipeline.process_ad(ad_content)
|
| 246 |
+
ad.sentiment = ai_results.get('sentiment')
|
| 247 |
+
except Exception as e:
|
| 248 |
+
logger.error(f"Error processing ad with AI: {e}")
|
| 249 |
+
|
| 250 |
+
# Save to database
|
| 251 |
+
db.session.add(ad)
|
| 252 |
+
|
| 253 |
+
db.session.commit()
|
| 254 |
+
logger.info(f"Saved {len(ads_data)} Facebook ads to database")
|
| 255 |
+
|
| 256 |
+
return {'status': 'success', 'count': len(ads_data)}
|
| 257 |
+
|
| 258 |
+
except Exception as e:
|
| 259 |
+
logger.error(f"Error in Facebook page ads scraping task: {e}")
|
| 260 |
+
db.session.rollback()
|
| 261 |
+
return {'status': 'error', 'message': str(e)}
|
| 262 |
+
|
| 263 |
+
@celery.task
|
| 264 |
+
def analyze_facebook_ad(ad_id):
|
| 265 |
+
"""Celery task to analyze a Facebook ad."""
|
| 266 |
+
try:
|
| 267 |
+
logger.info(f"Starting analysis for Facebook ad: {ad_id}")
|
| 268 |
+
|
| 269 |
+
# Get the ad
|
| 270 |
+
ad = FacebookAd.query.get(ad_id)
|
| 271 |
+
|
| 272 |
+
if not ad:
|
| 273 |
+
logger.error(f"Ad not found: {ad_id}")
|
| 274 |
+
return {'status': 'error', 'message': 'Ad not found'}
|
| 275 |
+
|
| 276 |
+
# Initialize AI pipeline
|
| 277 |
+
ai_pipeline = AIPipeline()
|
| 278 |
+
|
| 279 |
+
# Process with AI if there's content
|
| 280 |
+
if ad.content:
|
| 281 |
+
try:
|
| 282 |
+
# Create a simple object with content for AI processing
|
| 283 |
+
ad_content = type('obj', (object,), {
|
| 284 |
+
'content': ad.content
|
| 285 |
+
})
|
| 286 |
+
|
| 287 |
+
# Process with AI
|
| 288 |
+
ai_results = ai_pipeline.process_ad(ad_content)
|
| 289 |
+
|
| 290 |
+
# Update ad with results
|
| 291 |
+
ad.sentiment = ai_results.get('sentiment')
|
| 292 |
+
ad.topics = ai_results.get('topics')
|
| 293 |
+
ad.entities = ai_results.get('entities')
|
| 294 |
+
|
| 295 |
+
# Save to database
|
| 296 |
+
db.session.commit()
|
| 297 |
+
|
| 298 |
+
logger.info(f"Successfully analyzed Facebook ad: {ad_id}")
|
| 299 |
+
return {'status': 'success', 'ad_id': ad_id}
|
| 300 |
+
except Exception as e:
|
| 301 |
+
logger.error(f"Error processing ad with AI: {e}")
|
| 302 |
+
return {'status': 'error', 'message': str(e)}
|
| 303 |
+
else:
|
| 304 |
+
logger.warning(f"No content to analyze for ad: {ad_id}")
|
| 305 |
+
return {'status': 'warning', 'message': 'No content to analyze'}
|
| 306 |
+
|
| 307 |
+
except Exception as e:
|
| 308 |
+
logger.error(f"Error in Facebook ad analysis task: {e}")
|
| 309 |
+
db.session.rollback()
|
| 310 |
+
return {'status': 'error', 'message': str(e)}
|
app/services/__pycache__/ai_processor.cpython-312.pyc
CHANGED
|
Binary files a/app/services/__pycache__/ai_processor.cpython-312.pyc and b/app/services/__pycache__/ai_processor.cpython-312.pyc differ
|
|
|
app/services/__pycache__/facebook_scraper.cpython-312.pyc
ADDED
|
Binary file (19.7 kB). View file
|
|
|
app/services/ai_processor.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
logger = logging.getLogger(__name__)
|
| 5 |
|
|
@@ -9,89 +14,124 @@ class ProcessingError(Exception):
|
|
| 9 |
|
| 10 |
class AIPipeline:
|
| 11 |
def __init__(self):
|
| 12 |
-
|
| 13 |
-
self.detector = None
|
| 14 |
try:
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
config_path = model_dir / "yolov4.cfg"
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
self._setup_detector(str(weights_path), str(config_path))
|
| 24 |
-
|
| 25 |
except Exception as e:
|
| 26 |
logger.error(f"Error initializing AI Pipeline: {e}")
|
| 27 |
raise
|
| 28 |
|
| 29 |
-
def
|
| 30 |
-
"""
|
| 31 |
-
try:
|
| 32 |
-
import cv2
|
| 33 |
-
self.detector = cv2.dnn.readNet(weights_path, config_path)
|
| 34 |
-
except Exception as e:
|
| 35 |
-
logger.error(f"Error setting up detector: {e}")
|
| 36 |
-
self.detector = None
|
| 37 |
-
|
| 38 |
-
def _ensure_nlp_loaded(self):
|
| 39 |
-
"""Ensure NLP model is loaded before use."""
|
| 40 |
-
if self.nlp is None:
|
| 41 |
-
try:
|
| 42 |
-
logger.info("Loading NLP model...")
|
| 43 |
-
# Import transformers only when needed
|
| 44 |
-
from transformers import pipeline
|
| 45 |
-
self.nlp = pipeline("text-classification", model="roberta-base")
|
| 46 |
-
logger.info("NLP model loaded successfully")
|
| 47 |
-
except Exception as e:
|
| 48 |
-
logger.error(f"Error loading NLP model: {e}")
|
| 49 |
-
raise
|
| 50 |
-
|
| 51 |
-
def process_ad(self, ad):
|
| 52 |
-
if not ad:
|
| 53 |
-
raise ValueError("Ad content cannot be empty")
|
| 54 |
-
|
| 55 |
try:
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
except Exception as e:
|
| 63 |
-
logger.error(f"Error
|
| 64 |
-
|
| 65 |
|
| 66 |
-
def
|
| 67 |
-
|
| 68 |
-
return None
|
| 69 |
try:
|
| 70 |
-
self.
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
except Exception as e:
|
| 73 |
-
logger.error(f"
|
| 74 |
-
return
|
| 75 |
|
| 76 |
-
def
|
| 77 |
-
|
| 78 |
-
return None
|
| 79 |
try:
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
except Exception as e:
|
| 83 |
-
logger.error(f"
|
| 84 |
-
return
|
| 85 |
|
| 86 |
-
def
|
| 87 |
-
|
| 88 |
-
return None
|
| 89 |
try:
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
except Exception as e:
|
| 96 |
-
logger.error(f"
|
| 97 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
import logging
|
| 3 |
+
import json
|
| 4 |
+
from transformers import pipeline
|
| 5 |
+
from textblob import TextBlob
|
| 6 |
+
import spacy
|
| 7 |
+
import re
|
| 8 |
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
|
|
|
| 14 |
|
| 15 |
class AIPipeline:
|
| 16 |
def __init__(self):
|
| 17 |
+
"""Initialize the AI pipeline with necessary models."""
|
|
|
|
| 18 |
try:
|
| 19 |
+
# Load spaCy model for NER and topic extraction
|
| 20 |
+
self.nlp = spacy.load('en_core_web_sm')
|
|
|
|
| 21 |
|
| 22 |
+
# Initialize sentiment analyzer
|
| 23 |
+
self.sentiment = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
|
| 24 |
+
|
| 25 |
+
logger.info("AI Pipeline initialized successfully")
|
|
|
|
|
|
|
| 26 |
except Exception as e:
|
| 27 |
logger.error(f"Error initializing AI Pipeline: {e}")
|
| 28 |
raise
|
| 29 |
|
| 30 |
+
def _analyze_sentiment(self, text: str) -> float:
|
| 31 |
+
"""Analyze sentiment of text and return a score between -1 and 1."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
try:
|
| 33 |
+
# Use transformers for initial sentiment
|
| 34 |
+
result = self.sentiment(text)[0]
|
| 35 |
+
|
| 36 |
+
# Convert POSITIVE/NEGATIVE to float
|
| 37 |
+
if result['label'] == 'POSITIVE':
|
| 38 |
+
score = result['score']
|
| 39 |
+
else:
|
| 40 |
+
score = -result['score']
|
| 41 |
+
|
| 42 |
+
# Use TextBlob for additional nuance
|
| 43 |
+
blob = TextBlob(text)
|
| 44 |
+
blob_score = blob.sentiment.polarity
|
| 45 |
+
|
| 46 |
+
# Average the scores
|
| 47 |
+
final_score = (score + blob_score) / 2
|
| 48 |
+
|
| 49 |
+
return final_score
|
| 50 |
except Exception as e:
|
| 51 |
+
logger.error(f"Error in sentiment analysis: {e}")
|
| 52 |
+
return 0.0
|
| 53 |
|
| 54 |
+
def _extract_topics(self, text: str) -> list:
|
| 55 |
+
"""Extract main topics from text."""
|
|
|
|
| 56 |
try:
|
| 57 |
+
doc = self.nlp(text)
|
| 58 |
+
|
| 59 |
+
# Extract noun phrases as potential topics
|
| 60 |
+
noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks]
|
| 61 |
+
|
| 62 |
+
# Extract named entities that might be topics
|
| 63 |
+
entities = [ent.text.lower() for ent in doc.ents
|
| 64 |
+
if ent.label_ in ['ORG', 'PRODUCT', 'EVENT', 'WORK_OF_ART']]
|
| 65 |
+
|
| 66 |
+
# Combine and clean topics
|
| 67 |
+
all_topics = noun_phrases + entities
|
| 68 |
+
|
| 69 |
+
# Clean and filter topics
|
| 70 |
+
cleaned_topics = []
|
| 71 |
+
for topic in all_topics:
|
| 72 |
+
# Remove special characters and extra whitespace
|
| 73 |
+
topic = re.sub(r'[^\w\s]', '', topic)
|
| 74 |
+
topic = ' '.join(topic.split())
|
| 75 |
+
|
| 76 |
+
# Filter out short or common words
|
| 77 |
+
if len(topic) > 3 and topic not in ['the', 'this', 'that', 'these', 'those']:
|
| 78 |
+
cleaned_topics.append(topic)
|
| 79 |
+
|
| 80 |
+
# Remove duplicates and limit to top 5
|
| 81 |
+
unique_topics = list(set(cleaned_topics))
|
| 82 |
+
return sorted(unique_topics)[:5]
|
| 83 |
except Exception as e:
|
| 84 |
+
logger.error(f"Error in topic extraction: {e}")
|
| 85 |
+
return []
|
| 86 |
|
| 87 |
+
def _extract_entities(self, text: str) -> list:
|
| 88 |
+
"""Extract named entities from text."""
|
|
|
|
| 89 |
try:
|
| 90 |
+
doc = self.nlp(text)
|
| 91 |
+
|
| 92 |
+
entities = []
|
| 93 |
+
for ent in doc.ents:
|
| 94 |
+
entity = {
|
| 95 |
+
'text': ent.text,
|
| 96 |
+
'type': ent.label_,
|
| 97 |
+
'description': spacy.explain(ent.label_)
|
| 98 |
+
}
|
| 99 |
+
entities.append(entity)
|
| 100 |
+
|
| 101 |
+
return entities
|
| 102 |
except Exception as e:
|
| 103 |
+
logger.error(f"Error in entity extraction: {e}")
|
| 104 |
+
return []
|
| 105 |
|
| 106 |
+
def process_ad(self, ad) -> dict:
|
| 107 |
+
"""Process an ad and return analysis results."""
|
|
|
|
| 108 |
try:
|
| 109 |
+
# Ensure we have content to analyze
|
| 110 |
+
if not hasattr(ad, 'content') or not ad.content:
|
| 111 |
+
return {
|
| 112 |
+
'sentiment': 0.0,
|
| 113 |
+
'topics': [],
|
| 114 |
+
'entities': []
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
# Analyze sentiment
|
| 118 |
+
sentiment = self._analyze_sentiment(ad.content)
|
| 119 |
+
|
| 120 |
+
# Extract topics
|
| 121 |
+
topics = self._extract_topics(ad.content)
|
| 122 |
+
|
| 123 |
+
# Extract entities
|
| 124 |
+
entities = self._extract_entities(ad.content)
|
| 125 |
+
|
| 126 |
+
return {
|
| 127 |
+
'sentiment': sentiment,
|
| 128 |
+
'topics': topics,
|
| 129 |
+
'entities': entities
|
| 130 |
+
}
|
| 131 |
except Exception as e:
|
| 132 |
+
logger.error(f"Error in ad processing: {e}")
|
| 133 |
+
return {
|
| 134 |
+
'sentiment': 0.0,
|
| 135 |
+
'topics': [],
|
| 136 |
+
'entities': []
|
| 137 |
+
}
|
app/services/facebook_scraper.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from selenium import webdriver
|
| 2 |
+
from selenium.webdriver.common.by import By
|
| 3 |
+
from selenium.webdriver.chrome.service import Service
|
| 4 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
| 5 |
+
from selenium.webdriver.support import expected_conditions as EC
|
| 6 |
+
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException
|
| 7 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 8 |
+
import time
|
| 9 |
+
import json
|
| 10 |
+
import logging
|
| 11 |
+
import re
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
from contextlib import contextmanager
|
| 14 |
+
from typing import List, Dict, Any, Optional
|
| 15 |
+
from bs4 import BeautifulSoup
|
| 16 |
+
from urllib.parse import urlparse, parse_qs
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
class FacebookScraper:
|
| 21 |
+
"""
|
| 22 |
+
Enhanced Facebook Ads Library scraper with improved robustness and features.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, headless: bool = True, timeout: int = 10, use_proxy: bool = False, proxy: str = None):
|
| 26 |
+
"""
|
| 27 |
+
Initialize the Facebook scraper with configurable options.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
headless: Whether to run the browser in headless mode
|
| 31 |
+
timeout: Default timeout for waiting operations in seconds
|
| 32 |
+
use_proxy: Whether to use a proxy
|
| 33 |
+
proxy: Proxy server address (e.g., "http://user:pass@ip:port")
|
| 34 |
+
"""
|
| 35 |
+
self.driver = None
|
| 36 |
+
self.headless = headless
|
| 37 |
+
self.timeout = timeout
|
| 38 |
+
self.use_proxy = use_proxy
|
| 39 |
+
self.proxy = proxy
|
| 40 |
+
|
| 41 |
+
def _setup_driver(self):
|
| 42 |
+
"""Configure and initialize the Chrome WebDriver with optimal settings."""
|
| 43 |
+
options = webdriver.ChromeOptions()
|
| 44 |
+
|
| 45 |
+
if self.headless:
|
| 46 |
+
options.add_argument("--headless")
|
| 47 |
+
|
| 48 |
+
# Add common options for stability
|
| 49 |
+
options.add_argument("--no-sandbox")
|
| 50 |
+
options.add_argument("--disable-dev-shm-usage")
|
| 51 |
+
options.add_argument("--disable-gpu")
|
| 52 |
+
options.add_argument("--window-size=1920,1080")
|
| 53 |
+
|
| 54 |
+
# Add user agent to appear more like a regular browser
|
| 55 |
+
options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36")
|
| 56 |
+
|
| 57 |
+
# Add proxy if specified
|
| 58 |
+
if self.use_proxy and self.proxy:
|
| 59 |
+
options.add_argument(f'--proxy-server={self.proxy}')
|
| 60 |
+
|
| 61 |
+
# Disable automation flags to avoid detection
|
| 62 |
+
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
| 63 |
+
options.add_experimental_option('useAutomationExtension', False)
|
| 64 |
+
|
| 65 |
+
return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
|
| 66 |
+
|
| 67 |
+
@contextmanager
|
| 68 |
+
def _get_driver(self):
|
| 69 |
+
"""Context manager for browser session to ensure proper cleanup."""
|
| 70 |
+
try:
|
| 71 |
+
self.driver = self._setup_driver()
|
| 72 |
+
yield self.driver
|
| 73 |
+
except Exception as e:
|
| 74 |
+
logger.error(f"Error initializing WebDriver: {e}")
|
| 75 |
+
raise
|
| 76 |
+
finally:
|
| 77 |
+
if self.driver:
|
| 78 |
+
self.driver.quit()
|
| 79 |
+
|
| 80 |
+
def _wait_for_element(self, driver, selector: str, by: By = By.CSS_SELECTOR, timeout: int = None) -> Any:
|
| 81 |
+
"""
|
| 82 |
+
Wait for an element to be present and return it.
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
driver: WebDriver instance
|
| 86 |
+
selector: Element selector
|
| 87 |
+
by: Selector type (CSS, XPATH, etc.)
|
| 88 |
+
timeout: Wait timeout in seconds
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
The found web element
|
| 92 |
+
"""
|
| 93 |
+
if timeout is None:
|
| 94 |
+
timeout = self.timeout
|
| 95 |
+
|
| 96 |
+
wait = WebDriverWait(driver, timeout)
|
| 97 |
+
return wait.until(EC.presence_of_element_located((by, selector)))
|
| 98 |
+
|
| 99 |
+
def _wait_for_elements(self, driver, selector: str, by: By = By.CSS_SELECTOR, timeout: int = None) -> List[Any]:
|
| 100 |
+
"""
|
| 101 |
+
Wait for elements to be present and return them.
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
driver: WebDriver instance
|
| 105 |
+
selector: Elements selector
|
| 106 |
+
by: Selector type (CSS, XPATH, etc.)
|
| 107 |
+
timeout: Wait timeout in seconds
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
List of found web elements
|
| 111 |
+
"""
|
| 112 |
+
if timeout is None:
|
| 113 |
+
timeout = self.timeout
|
| 114 |
+
|
| 115 |
+
wait = WebDriverWait(driver, timeout)
|
| 116 |
+
return wait.until(EC.presence_of_all_elements_located((by, selector)))
|
| 117 |
+
|
| 118 |
+
def _scroll_to_load_more(self, driver, scroll_count: int = 5, scroll_pause: float = 2.0):
|
| 119 |
+
"""
|
| 120 |
+
Scroll down the page to load more content.
|
| 121 |
+
|
| 122 |
+
Args:
|
| 123 |
+
driver: WebDriver instance
|
| 124 |
+
scroll_count: Number of times to scroll
|
| 125 |
+
scroll_pause: Pause between scrolls in seconds
|
| 126 |
+
"""
|
| 127 |
+
for i in range(scroll_count):
|
| 128 |
+
# Scroll down to bottom
|
| 129 |
+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
| 130 |
+
|
| 131 |
+
# Wait to load page
|
| 132 |
+
time.sleep(scroll_pause)
|
| 133 |
+
|
| 134 |
+
# Log progress
|
| 135 |
+
logger.debug(f"Completed scroll {i+1}/{scroll_count}")
|
| 136 |
+
|
| 137 |
+
def _extract_ad_details(self, ad_element) -> Dict[str, Any]:
|
| 138 |
+
"""
|
| 139 |
+
Extract detailed information from an ad element.
|
| 140 |
+
|
| 141 |
+
Args:
|
| 142 |
+
ad_element: WebElement containing the ad
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
Dictionary with ad details
|
| 146 |
+
"""
|
| 147 |
+
ad_data = {
|
| 148 |
+
"scrape_time": datetime.now().isoformat(),
|
| 149 |
+
"platform": "facebook",
|
| 150 |
+
"raw_text": ad_element.text
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
try:
|
| 154 |
+
# Try to extract advertiser name
|
| 155 |
+
advertiser_elem = ad_element.find_elements(By.CSS_SELECTOR, "span[dir='auto']")
|
| 156 |
+
if advertiser_elem:
|
| 157 |
+
ad_data["advertiser"] = advertiser_elem[0].text
|
| 158 |
+
|
| 159 |
+
# Try to extract ad content
|
| 160 |
+
content_elem = ad_element.find_elements(By.CSS_SELECTOR, "div[dir='auto']")
|
| 161 |
+
if content_elem:
|
| 162 |
+
ad_data["content"] = "\n".join([elem.text for elem in content_elem])
|
| 163 |
+
|
| 164 |
+
# Try to extract images
|
| 165 |
+
img_elems = ad_element.find_elements(By.TAG_NAME, "img")
|
| 166 |
+
if img_elems:
|
| 167 |
+
ad_data["images"] = [img.get_attribute("src") for img in img_elems if img.get_attribute("src")]
|
| 168 |
+
|
| 169 |
+
# Try to extract links
|
| 170 |
+
link_elems = ad_element.find_elements(By.TAG_NAME, "a")
|
| 171 |
+
if link_elems:
|
| 172 |
+
ad_data["links"] = [link.get_attribute("href") for link in link_elems if link.get_attribute("href")]
|
| 173 |
+
|
| 174 |
+
# Try to extract ad ID from URL
|
| 175 |
+
if "links" in ad_data and ad_data["links"]:
|
| 176 |
+
for link in ad_data["links"]:
|
| 177 |
+
id_match = re.search(r'id=(\d+)', link)
|
| 178 |
+
if id_match:
|
| 179 |
+
ad_data["ad_id"] = id_match.group(1)
|
| 180 |
+
break
|
| 181 |
+
|
| 182 |
+
except Exception as e:
|
| 183 |
+
logger.warning(f"Error extracting ad details: {e}")
|
| 184 |
+
|
| 185 |
+
return ad_data
|
| 186 |
+
|
| 187 |
+
def scrape_ads(self, search_query: str, num_scrolls: int = 5, country_code: str = "ALL") -> List[Dict[str, Any]]:
|
| 188 |
+
"""
|
| 189 |
+
Scrape ads from Facebook Ads Library based on a search query.
|
| 190 |
+
|
| 191 |
+
Args:
|
| 192 |
+
search_query: Keyword to search for
|
| 193 |
+
num_scrolls: Number of times to scroll to load more ads
|
| 194 |
+
country_code: Country code filter (e.g., "US", "GB", "ALL")
|
| 195 |
+
|
| 196 |
+
Returns:
|
| 197 |
+
List of dictionaries containing ad information
|
| 198 |
+
"""
|
| 199 |
+
with self._get_driver() as driver:
|
| 200 |
+
try:
|
| 201 |
+
# Construct URL with parameters
|
| 202 |
+
url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country={country_code}&q={search_query}&search_type=keyword"
|
| 203 |
+
logger.info(f"Accessing Facebook Ads Library: {url}")
|
| 204 |
+
|
| 205 |
+
# Navigate to the URL
|
| 206 |
+
driver.get(url)
|
| 207 |
+
|
| 208 |
+
# Wait for initial content to load
|
| 209 |
+
try:
|
| 210 |
+
self._wait_for_element(driver, "div[role='main']")
|
| 211 |
+
except TimeoutException:
|
| 212 |
+
logger.warning("Timeout waiting for main content to load")
|
| 213 |
+
|
| 214 |
+
# Scroll to load more ads
|
| 215 |
+
self._scroll_to_load_more(driver, num_scrolls)
|
| 216 |
+
|
| 217 |
+
# Find all ad elements
|
| 218 |
+
# Try multiple selectors as Facebook might change their structure
|
| 219 |
+
selectors = [
|
| 220 |
+
"div.x1yztbdb", # Current selector
|
| 221 |
+
"div[role='article']", # Alternative selector
|
| 222 |
+
"div.x1iorvi4" # Another possible selector
|
| 223 |
+
]
|
| 224 |
+
|
| 225 |
+
ad_elements = []
|
| 226 |
+
for selector in selectors:
|
| 227 |
+
try:
|
| 228 |
+
elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
| 229 |
+
if elements:
|
| 230 |
+
ad_elements = elements
|
| 231 |
+
logger.info(f"Found {len(elements)} ads using selector: {selector}")
|
| 232 |
+
break
|
| 233 |
+
except Exception as e:
|
| 234 |
+
logger.debug(f"Selector {selector} failed: {e}")
|
| 235 |
+
|
| 236 |
+
if not ad_elements:
|
| 237 |
+
logger.warning("No ad elements found with any selector")
|
| 238 |
+
return []
|
| 239 |
+
|
| 240 |
+
# Extract detailed information from each ad
|
| 241 |
+
ads_data = []
|
| 242 |
+
for i, ad_element in enumerate(ad_elements):
|
| 243 |
+
try:
|
| 244 |
+
ad_data = self._extract_ad_details(ad_element)
|
| 245 |
+
ad_data["position"] = i + 1
|
| 246 |
+
ad_data["search_query"] = search_query
|
| 247 |
+
ads_data.append(ad_data)
|
| 248 |
+
except Exception as e:
|
| 249 |
+
logger.error(f"Error processing ad {i+1}: {e}")
|
| 250 |
+
|
| 251 |
+
logger.info(f"Successfully scraped {len(ads_data)} ads")
|
| 252 |
+
return ads_data
|
| 253 |
+
|
| 254 |
+
except (TimeoutException, WebDriverException) as e:
|
| 255 |
+
logger.error(f"Error during scraping: {e}")
|
| 256 |
+
return []
|
| 257 |
+
|
| 258 |
+
def scrape_advertiser_details(self, advertiser_id: str) -> Dict[str, Any]:
|
| 259 |
+
"""
|
| 260 |
+
Scrape details about a specific advertiser.
|
| 261 |
+
|
| 262 |
+
Args:
|
| 263 |
+
advertiser_id: Facebook ID of the advertiser
|
| 264 |
+
|
| 265 |
+
Returns:
|
| 266 |
+
Dictionary with advertiser information
|
| 267 |
+
"""
|
| 268 |
+
with self._get_driver() as driver:
|
| 269 |
+
try:
|
| 270 |
+
url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&view_all_page_id={advertiser_id}"
|
| 271 |
+
logger.info(f"Accessing advertiser page: {url}")
|
| 272 |
+
|
| 273 |
+
driver.get(url)
|
| 274 |
+
|
| 275 |
+
# Wait for page to load
|
| 276 |
+
try:
|
| 277 |
+
self._wait_for_element(driver, "div[role='main']")
|
| 278 |
+
except TimeoutException:
|
| 279 |
+
logger.warning("Timeout waiting for advertiser page to load")
|
| 280 |
+
|
| 281 |
+
# Extract advertiser information
|
| 282 |
+
advertiser_data = {
|
| 283 |
+
"id": advertiser_id,
|
| 284 |
+
"scrape_time": datetime.now().isoformat()
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
# Try to get advertiser name
|
| 288 |
+
try:
|
| 289 |
+
name_elem = self._wait_for_element(driver, "div[role='main'] h1", timeout=5)
|
| 290 |
+
advertiser_data["name"] = name_elem.text
|
| 291 |
+
except:
|
| 292 |
+
pass
|
| 293 |
+
|
| 294 |
+
# Try to get ad count
|
| 295 |
+
try:
|
| 296 |
+
count_text = driver.find_element(By.XPATH, "//div[contains(text(), 'ads')]").text
|
| 297 |
+
count_match = re.search(r'(\d+)\s+ads', count_text)
|
| 298 |
+
if count_match:
|
| 299 |
+
advertiser_data["ad_count"] = int(count_match.group(1))
|
| 300 |
+
except:
|
| 301 |
+
pass
|
| 302 |
+
|
| 303 |
+
# Scroll to load some ads
|
| 304 |
+
self._scroll_to_load_more(driver, 3)
|
| 305 |
+
|
| 306 |
+
# Get sample ads
|
| 307 |
+
ad_elements = driver.find_elements(By.CSS_SELECTOR, "div.x1yztbdb")
|
| 308 |
+
sample_ads = []
|
| 309 |
+
|
| 310 |
+
for i, ad_element in enumerate(ad_elements[:5]): # Get up to 5 sample ads
|
| 311 |
+
try:
|
| 312 |
+
ad_data = self._extract_ad_details(ad_element)
|
| 313 |
+
sample_ads.append(ad_data)
|
| 314 |
+
except Exception as e:
|
| 315 |
+
logger.error(f"Error processing sample ad {i+1}: {e}")
|
| 316 |
+
|
| 317 |
+
advertiser_data["sample_ads"] = sample_ads
|
| 318 |
+
advertiser_data["sample_ad_count"] = len(sample_ads)
|
| 319 |
+
|
| 320 |
+
return advertiser_data
|
| 321 |
+
|
| 322 |
+
except Exception as e:
|
| 323 |
+
logger.error(f"Error scraping advertiser details: {e}")
|
| 324 |
+
return {"id": advertiser_id, "error": str(e)}
|
| 325 |
+
|
| 326 |
+
def scrape_ads_by_topic(self, topic: str, num_scrolls: int = 5, country_code: str = "ALL") -> List[Dict[str, Any]]:
|
| 327 |
+
"""
|
| 328 |
+
Scrape ads related to a specific topic.
|
| 329 |
+
|
| 330 |
+
Args:
|
| 331 |
+
topic: Topic to search for (e.g., "politics", "health", "finance")
|
| 332 |
+
num_scrolls: Number of times to scroll to load more ads
|
| 333 |
+
country_code: Country code filter
|
| 334 |
+
|
| 335 |
+
Returns:
|
| 336 |
+
List of dictionaries containing ad information
|
| 337 |
+
"""
|
| 338 |
+
# This is essentially the same as scrape_ads but with a different name for clarity
|
| 339 |
+
return self.scrape_ads(topic, num_scrolls, country_code)
|
| 340 |
+
|
| 341 |
+
def scrape_ads_by_page(self, page_name: str, num_scrolls: int = 5) -> List[Dict[str, Any]]:
|
| 342 |
+
"""
|
| 343 |
+
Scrape ads from a specific Facebook page.
|
| 344 |
+
|
| 345 |
+
Args:
|
| 346 |
+
page_name: Name of the Facebook page
|
| 347 |
+
num_scrolls: Number of times to scroll to load more ads
|
| 348 |
+
|
| 349 |
+
Returns:
|
| 350 |
+
List of dictionaries containing ad information
|
| 351 |
+
"""
|
| 352 |
+
with self._get_driver() as driver:
|
| 353 |
+
try:
|
| 354 |
+
# First, try to find the page ID
|
| 355 |
+
search_url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&q={page_name}&search_type=page"
|
| 356 |
+
logger.info(f"Searching for page: {search_url}")
|
| 357 |
+
|
| 358 |
+
driver.get(search_url)
|
| 359 |
+
|
| 360 |
+
# Wait for search results
|
| 361 |
+
try:
|
| 362 |
+
self._wait_for_element(driver, "div[role='main']")
|
| 363 |
+
except TimeoutException:
|
| 364 |
+
logger.warning("Timeout waiting for page search results")
|
| 365 |
+
|
| 366 |
+
# Try to find and click on the first page result
|
| 367 |
+
try:
|
| 368 |
+
page_links = driver.find_elements(By.CSS_SELECTOR, "a[href*='view_all_page_id=']")
|
| 369 |
+
if page_links:
|
| 370 |
+
# Extract page ID from URL
|
| 371 |
+
href = page_links[0].get_attribute("href")
|
| 372 |
+
page_id_match = re.search(r'view_all_page_id=(\d+)', href)
|
| 373 |
+
|
| 374 |
+
if page_id_match:
|
| 375 |
+
page_id = page_id_match.group(1)
|
| 376 |
+
logger.info(f"Found page ID: {page_id}")
|
| 377 |
+
|
| 378 |
+
# Navigate directly to page's ads
|
| 379 |
+
page_url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&view_all_page_id={page_id}"
|
| 380 |
+
driver.get(page_url)
|
| 381 |
+
|
| 382 |
+
# Wait for page to load
|
| 383 |
+
try:
|
| 384 |
+
self._wait_for_element(driver, "div[role='main']")
|
| 385 |
+
except TimeoutException:
|
| 386 |
+
logger.warning("Timeout waiting for page ads to load")
|
| 387 |
+
|
| 388 |
+
# Scroll to load more ads
|
| 389 |
+
self._scroll_to_load_more(driver, num_scrolls)
|
| 390 |
+
|
| 391 |
+
# Find all ad elements
|
| 392 |
+
ad_elements = driver.find_elements(By.CSS_SELECTOR, "div.x1yztbdb")
|
| 393 |
+
|
| 394 |
+
# Extract detailed information from each ad
|
| 395 |
+
ads_data = []
|
| 396 |
+
for i, ad_element in enumerate(ad_elements):
|
| 397 |
+
try:
|
| 398 |
+
ad_data = self._extract_ad_details(ad_element)
|
| 399 |
+
ad_data["position"] = i + 1
|
| 400 |
+
ad_data["page_name"] = page_name
|
| 401 |
+
ad_data["page_id"] = page_id
|
| 402 |
+
ads_data.append(ad_data)
|
| 403 |
+
except Exception as e:
|
| 404 |
+
logger.error(f"Error processing ad {i+1}: {e}")
|
| 405 |
+
|
| 406 |
+
logger.info(f"Successfully scraped {len(ads_data)} ads from page {page_name}")
|
| 407 |
+
return ads_data
|
| 408 |
+
except Exception as e:
|
| 409 |
+
logger.error(f"Error finding page: {e}")
|
| 410 |
+
|
| 411 |
+
# If we couldn't find the page, fall back to regular search
|
| 412 |
+
logger.warning(f"Could not find page {page_name}, falling back to keyword search")
|
| 413 |
+
return self.scrape_ads(page_name, num_scrolls)
|
| 414 |
+
|
| 415 |
+
except Exception as e:
|
| 416 |
+
logger.error(f"Error during page scraping: {e}")
|
| 417 |
+
return []
|
app/templates/base.html
CHANGED
|
@@ -3,11 +3,12 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>{% block title %}Facebook Ad Analytics{% endblock %}</title>
|
| 7 |
<!-- Bootstrap CSS -->
|
| 8 |
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 9 |
<!-- Custom CSS -->
|
| 10 |
<link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
|
|
|
|
| 11 |
{% block head_extra %}{% endblock %}
|
| 12 |
</head>
|
| 13 |
<body>
|
|
@@ -26,7 +27,14 @@
|
|
| 26 |
<a class="nav-link" href="{{ url_for('google_ads.index') }}">Google Ads</a>
|
| 27 |
</li>
|
| 28 |
<li class="nav-item">
|
| 29 |
-
<a class="nav-link" href="{{ url_for('
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
</li>
|
| 31 |
</ul>
|
| 32 |
<ul class="navbar-nav">
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>{% block title %}Facebook Ad Analytics{% endblock %} - Ad Analysis Tool</title>
|
| 7 |
<!-- Bootstrap CSS -->
|
| 8 |
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 9 |
<!-- Custom CSS -->
|
| 10 |
<link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
|
| 11 |
+
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
|
| 12 |
{% block head_extra %}{% endblock %}
|
| 13 |
</head>
|
| 14 |
<body>
|
|
|
|
| 27 |
<a class="nav-link" href="{{ url_for('google_ads.index') }}">Google Ads</a>
|
| 28 |
</li>
|
| 29 |
<li class="nav-item">
|
| 30 |
+
<a class="nav-link" href="{{ url_for('facebook_ads.index') }}">
|
| 31 |
+
<i class="fab fa-facebook"></i> Facebook Ads
|
| 32 |
+
</a>
|
| 33 |
+
</li>
|
| 34 |
+
<li class="nav-item">
|
| 35 |
+
<a class="nav-link" href="{{ url_for('compliance.compliance_report') }}">
|
| 36 |
+
<i class="fas fa-check-circle"></i> Compliance
|
| 37 |
+
</a>
|
| 38 |
</li>
|
| 39 |
</ul>
|
| 40 |
<ul class="navbar-nav">
|
app/templates/facebook_ads/ad_analysis.html
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Ad Analysis - {{ ad.advertiser }}{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<nav aria-label="breadcrumb">
|
| 8 |
+
<ol class="breadcrumb">
|
| 9 |
+
<li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.index') }}">Dashboard</a></li>
|
| 10 |
+
<li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.results') }}">Results</a></li>
|
| 11 |
+
<li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.view_ad', ad_id=ad.id) }}">Ad Details</a></li>
|
| 12 |
+
<li class="breadcrumb-item active" aria-current="page">Analysis</li>
|
| 13 |
+
</ol>
|
| 14 |
+
</nav>
|
| 15 |
+
|
| 16 |
+
<div class="card mb-4">
|
| 17 |
+
<div class="card-header">
|
| 18 |
+
<h2 class="mb-0">Ad Analysis</h2>
|
| 19 |
+
</div>
|
| 20 |
+
<div class="card-body">
|
| 21 |
+
<div class="row mb-4">
|
| 22 |
+
<div class="col-md-12">
|
| 23 |
+
<h4>Original Content</h4>
|
| 24 |
+
<p class="lead">{{ ad.content }}</p>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
|
| 28 |
+
<div class="row">
|
| 29 |
+
<div class="col-md-6">
|
| 30 |
+
<div class="card mb-4">
|
| 31 |
+
<div class="card-body">
|
| 32 |
+
<h4>Sentiment Analysis</h4>
|
| 33 |
+
{% if ad.sentiment is not none %}
|
| 34 |
+
<div class="progress mb-3">
|
| 35 |
+
{% set sentiment_percent = ((ad.sentiment + 1) / 2) * 100 %}
|
| 36 |
+
<div class="progress-bar bg-{{ 'success' if ad.sentiment > 0 else 'danger' if ad.sentiment < 0 else 'secondary' }}"
|
| 37 |
+
role="progressbar"
|
| 38 |
+
style="width: {{ sentiment_percent }}%"
|
| 39 |
+
aria-valuenow="{{ sentiment_percent }}"
|
| 40 |
+
aria-valuemin="0"
|
| 41 |
+
aria-valuemax="100">
|
| 42 |
+
{{ "Positive" if ad.sentiment > 0 else "Negative" if ad.sentiment < 0 else "Neutral" }}
|
| 43 |
+
</div>
|
| 44 |
+
</div>
|
| 45 |
+
<p>
|
| 46 |
+
<strong>Score:</strong> {{ "%.2f"|format(ad.sentiment) }}<br>
|
| 47 |
+
<small class="text-muted">
|
| 48 |
+
Scores range from -1 (very negative) to +1 (very positive)
|
| 49 |
+
</small>
|
| 50 |
+
</p>
|
| 51 |
+
{% else %}
|
| 52 |
+
<div class="alert alert-info">
|
| 53 |
+
Sentiment analysis is in progress...
|
| 54 |
+
</div>
|
| 55 |
+
{% endif %}
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
</div>
|
| 59 |
+
|
| 60 |
+
<div class="col-md-6">
|
| 61 |
+
<div class="card mb-4">
|
| 62 |
+
<div class="card-body">
|
| 63 |
+
<h4>Topics</h4>
|
| 64 |
+
{% if ad.topics %}
|
| 65 |
+
<div class="mb-3">
|
| 66 |
+
{% for topic in ad.topics %}
|
| 67 |
+
<span class="badge bg-info me-2 mb-2">{{ topic }}</span>
|
| 68 |
+
{% endfor %}
|
| 69 |
+
</div>
|
| 70 |
+
<small class="text-muted">
|
| 71 |
+
Topics are extracted using natural language processing
|
| 72 |
+
</small>
|
| 73 |
+
{% else %}
|
| 74 |
+
<div class="alert alert-info">
|
| 75 |
+
Topic analysis is in progress...
|
| 76 |
+
</div>
|
| 77 |
+
{% endif %}
|
| 78 |
+
</div>
|
| 79 |
+
</div>
|
| 80 |
+
</div>
|
| 81 |
+
</div>
|
| 82 |
+
|
| 83 |
+
{% if ad.entities %}
|
| 84 |
+
<div class="card mb-4">
|
| 85 |
+
<div class="card-body">
|
| 86 |
+
<h4>Named Entities</h4>
|
| 87 |
+
<div class="table-responsive">
|
| 88 |
+
<table class="table table-striped">
|
| 89 |
+
<thead>
|
| 90 |
+
<tr>
|
| 91 |
+
<th>Entity</th>
|
| 92 |
+
<th>Type</th>
|
| 93 |
+
<th>Description</th>
|
| 94 |
+
</tr>
|
| 95 |
+
</thead>
|
| 96 |
+
<tbody>
|
| 97 |
+
{% for entity in ad.entities %}
|
| 98 |
+
<tr>
|
| 99 |
+
<td>{{ entity.text }}</td>
|
| 100 |
+
<td><span class="badge bg-secondary">{{ entity.type }}</span></td>
|
| 101 |
+
<td>{{ entity.description or 'N/A' }}</td>
|
| 102 |
+
</tr>
|
| 103 |
+
{% endfor %}
|
| 104 |
+
</tbody>
|
| 105 |
+
</table>
|
| 106 |
+
</div>
|
| 107 |
+
<small class="text-muted">
|
| 108 |
+
Named entities are important words or phrases that represent specific concepts
|
| 109 |
+
</small>
|
| 110 |
+
</div>
|
| 111 |
+
</div>
|
| 112 |
+
{% endif %}
|
| 113 |
+
|
| 114 |
+
{% if not ad.sentiment or not ad.topics %}
|
| 115 |
+
<div class="alert alert-warning">
|
| 116 |
+
<h4 class="alert-heading">Analysis in Progress</h4>
|
| 117 |
+
<p>The ad content is being analyzed. This process may take a few moments. Please refresh the page to see updated results.</p>
|
| 118 |
+
</div>
|
| 119 |
+
{% endif %}
|
| 120 |
+
</div>
|
| 121 |
+
</div>
|
| 122 |
+
</div>
|
| 123 |
+
{% endblock %}
|
app/templates/facebook_ads/ad_detail.html
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Facebook Ad Details{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<nav aria-label="breadcrumb">
|
| 8 |
+
<ol class="breadcrumb">
|
| 9 |
+
<li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.index') }}">Dashboard</a></li>
|
| 10 |
+
<li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.results') }}">Results</a></li>
|
| 11 |
+
<li class="breadcrumb-item active" aria-current="page">Ad Details</li>
|
| 12 |
+
</ol>
|
| 13 |
+
</nav>
|
| 14 |
+
|
| 15 |
+
<div class="card mb-4">
|
| 16 |
+
<div class="card-header">
|
| 17 |
+
<h2 class="mb-0">
|
| 18 |
+
<a href="{{ url_for('facebook_ads.advertiser_detail', advertiser_name=ad.advertiser) }}">
|
| 19 |
+
{{ ad.advertiser }}
|
| 20 |
+
</a>
|
| 21 |
+
</h2>
|
| 22 |
+
</div>
|
| 23 |
+
<div class="card-body">
|
| 24 |
+
{% if ad.image_urls %}
|
| 25 |
+
<div class="row mb-4">
|
| 26 |
+
{% for image_url in ad.image_urls %}
|
| 27 |
+
<div class="col-md-6 mb-3">
|
| 28 |
+
<img src="{{ image_url }}" class="img-fluid rounded" alt="Ad Image {{ loop.index }}">
|
| 29 |
+
</div>
|
| 30 |
+
{% endfor %}
|
| 31 |
+
</div>
|
| 32 |
+
{% endif %}
|
| 33 |
+
|
| 34 |
+
<div class="mb-4">
|
| 35 |
+
<h4>Ad Content</h4>
|
| 36 |
+
<p class="lead">{{ ad.content }}</p>
|
| 37 |
+
</div>
|
| 38 |
+
|
| 39 |
+
{% if ad.links %}
|
| 40 |
+
<div class="mb-4">
|
| 41 |
+
<h4>Links</h4>
|
| 42 |
+
<ul class="list-group">
|
| 43 |
+
{% for link in ad.links %}
|
| 44 |
+
<li class="list-group-item">
|
| 45 |
+
<a href="{{ link }}" target="_blank" rel="noopener noreferrer">{{ link }}</a>
|
| 46 |
+
</li>
|
| 47 |
+
{% endfor %}
|
| 48 |
+
</ul>
|
| 49 |
+
</div>
|
| 50 |
+
{% endif %}
|
| 51 |
+
|
| 52 |
+
<div class="row">
|
| 53 |
+
<div class="col-md-6">
|
| 54 |
+
{% if ad.sentiment is not none %}
|
| 55 |
+
<div class="mb-4">
|
| 56 |
+
<h4>Sentiment Analysis</h4>
|
| 57 |
+
<div class="progress">
|
| 58 |
+
{% set sentiment_percent = ((ad.sentiment + 1) / 2) * 100 %}
|
| 59 |
+
<div class="progress-bar bg-{{ 'success' if ad.sentiment > 0 else 'danger' if ad.sentiment < 0 else 'secondary' }}"
|
| 60 |
+
role="progressbar"
|
| 61 |
+
style="width: {{ sentiment_percent }}%"
|
| 62 |
+
aria-valuenow="{{ sentiment_percent }}"
|
| 63 |
+
aria-valuemin="0"
|
| 64 |
+
aria-valuemax="100">
|
| 65 |
+
{{ "Positive" if ad.sentiment > 0 else "Negative" if ad.sentiment < 0 else "Neutral" }}
|
| 66 |
+
</div>
|
| 67 |
+
</div>
|
| 68 |
+
<small class="text-muted">Score: {{ "%.2f"|format(ad.sentiment) }}</small>
|
| 69 |
+
</div>
|
| 70 |
+
{% endif %}
|
| 71 |
+
</div>
|
| 72 |
+
|
| 73 |
+
<div class="col-md-6">
|
| 74 |
+
{% if ad.topics %}
|
| 75 |
+
<div class="mb-4">
|
| 76 |
+
<h4>Topics</h4>
|
| 77 |
+
{% for topic in ad.topics %}
|
| 78 |
+
<span class="badge bg-info me-2 mb-2">{{ topic }}</span>
|
| 79 |
+
{% endfor %}
|
| 80 |
+
</div>
|
| 81 |
+
{% endif %}
|
| 82 |
+
</div>
|
| 83 |
+
</div>
|
| 84 |
+
|
| 85 |
+
{% if ad.entities %}
|
| 86 |
+
<div class="mb-4">
|
| 87 |
+
<h4>Entities</h4>
|
| 88 |
+
<div class="table-responsive">
|
| 89 |
+
<table class="table table-striped">
|
| 90 |
+
<thead>
|
| 91 |
+
<tr>
|
| 92 |
+
<th>Entity</th>
|
| 93 |
+
<th>Type</th>
|
| 94 |
+
</tr>
|
| 95 |
+
</thead>
|
| 96 |
+
<tbody>
|
| 97 |
+
{% for entity in ad.entities %}
|
| 98 |
+
<tr>
|
| 99 |
+
<td>{{ entity.text }}</td>
|
| 100 |
+
<td><span class="badge bg-secondary">{{ entity.type }}</span></td>
|
| 101 |
+
</tr>
|
| 102 |
+
{% endfor %}
|
| 103 |
+
</tbody>
|
| 104 |
+
</table>
|
| 105 |
+
</div>
|
| 106 |
+
</div>
|
| 107 |
+
{% endif %}
|
| 108 |
+
|
| 109 |
+
{% if not ad.sentiment or not ad.topics %}
|
| 110 |
+
<div class="mt-4">
|
| 111 |
+
<a href="{{ url_for('facebook_ads.analyze_ad', ad_id=ad.id) }}" class="btn btn-primary">
|
| 112 |
+
Analyze Ad Content
|
| 113 |
+
</a>
|
| 114 |
+
</div>
|
| 115 |
+
{% endif %}
|
| 116 |
+
</div>
|
| 117 |
+
<div class="card-footer text-muted">
|
| 118 |
+
<div class="row">
|
| 119 |
+
<div class="col-md-6">
|
| 120 |
+
Scraped: {{ ad.created_at.strftime('%Y-%m-%d %H:%M:%S') }}
|
| 121 |
+
</div>
|
| 122 |
+
<div class="col-md-6 text-end">
|
| 123 |
+
Search Query: {{ ad.search_query or 'N/A' }}
|
| 124 |
+
</div>
|
| 125 |
+
</div>
|
| 126 |
+
</div>
|
| 127 |
+
</div>
|
| 128 |
+
</div>
|
| 129 |
+
{% endblock %}
|
app/templates/facebook_ads/advertiser_detail.html
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}{{ advertiser }} - Facebook Ads{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<nav aria-label="breadcrumb">
|
| 8 |
+
<ol class="breadcrumb">
|
| 9 |
+
<li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.index') }}">Dashboard</a></li>
|
| 10 |
+
<li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.advertisers') }}">Advertisers</a></li>
|
| 11 |
+
<li class="breadcrumb-item active" aria-current="page">{{ advertiser }}</li>
|
| 12 |
+
</ol>
|
| 13 |
+
</nav>
|
| 14 |
+
|
| 15 |
+
<div class="row mb-4">
|
| 16 |
+
<div class="col">
|
| 17 |
+
<h1>{{ advertiser }}</h1>
|
| 18 |
+
</div>
|
| 19 |
+
<div class="col-auto">
|
| 20 |
+
<a href="{{ url_for('facebook_ads.page_search') }}?page_name={{ advertiser }}"
|
| 21 |
+
class="btn btn-primary">
|
| 22 |
+
Scrape More Ads
|
| 23 |
+
</a>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
|
| 27 |
+
{% if ads %}
|
| 28 |
+
<div class="row">
|
| 29 |
+
<div class="col-md-4 mb-4">
|
| 30 |
+
<div class="card">
|
| 31 |
+
<div class="card-body">
|
| 32 |
+
<h5 class="card-title">Statistics</h5>
|
| 33 |
+
<ul class="list-unstyled">
|
| 34 |
+
<li>Total Ads: {{ ads|length }}</li>
|
| 35 |
+
<li>First Ad: {{ ads[-1].created_at.strftime('%Y-%m-%d') }}</li>
|
| 36 |
+
<li>Latest Ad: {{ ads[0].created_at.strftime('%Y-%m-%d') }}</li>
|
| 37 |
+
</ul>
|
| 38 |
+
</div>
|
| 39 |
+
</div>
|
| 40 |
+
</div>
|
| 41 |
+
|
| 42 |
+
<div class="col-md-8 mb-4">
|
| 43 |
+
<div class="card">
|
| 44 |
+
<div class="card-body">
|
| 45 |
+
<h5 class="card-title">Sentiment Overview</h5>
|
| 46 |
+
{% set positive = namespace(count=0) %}
|
| 47 |
+
{% set negative = namespace(count=0) %}
|
| 48 |
+
{% set neutral = namespace(count=0) %}
|
| 49 |
+
{% for ad in ads %}
|
| 50 |
+
{% if ad.sentiment is not none %}
|
| 51 |
+
{% if ad.sentiment > 0 %}
|
| 52 |
+
{% set positive.count = positive.count + 1 %}
|
| 53 |
+
{% elif ad.sentiment < 0 %}
|
| 54 |
+
{% set negative.count = negative.count + 1 %}
|
| 55 |
+
{% else %}
|
| 56 |
+
{% set neutral.count = neutral.count + 1 %}
|
| 57 |
+
{% endif %}
|
| 58 |
+
{% endif %}
|
| 59 |
+
{% endfor %}
|
| 60 |
+
|
| 61 |
+
<div class="progress">
|
| 62 |
+
{% set total = positive.count + negative.count + neutral.count %}
|
| 63 |
+
{% if total > 0 %}
|
| 64 |
+
<div class="progress-bar bg-success" role="progressbar"
|
| 65 |
+
style="width: {{ (positive.count / total * 100)|round }}%">
|
| 66 |
+
{{ positive.count }}
|
| 67 |
+
</div>
|
| 68 |
+
<div class="progress-bar bg-secondary" role="progressbar"
|
| 69 |
+
style="width: {{ (neutral.count / total * 100)|round }}%">
|
| 70 |
+
{{ neutral.count }}
|
| 71 |
+
</div>
|
| 72 |
+
<div class="progress-bar bg-danger" role="progressbar"
|
| 73 |
+
style="width: {{ (negative.count / total * 100)|round }}%">
|
| 74 |
+
{{ negative.count }}
|
| 75 |
+
</div>
|
| 76 |
+
{% endif %}
|
| 77 |
+
</div>
|
| 78 |
+
<div class="mt-2">
|
| 79 |
+
<small class="text-muted">
|
| 80 |
+
Positive: {{ positive.count }},
|
| 81 |
+
Neutral: {{ neutral.count }},
|
| 82 |
+
Negative: {{ negative.count }}
|
| 83 |
+
</small>
|
| 84 |
+
</div>
|
| 85 |
+
</div>
|
| 86 |
+
</div>
|
| 87 |
+
</div>
|
| 88 |
+
</div>
|
| 89 |
+
|
| 90 |
+
<div class="row">
|
| 91 |
+
{% for ad in ads %}
|
| 92 |
+
<div class="col-md-6 mb-4">
|
| 93 |
+
<div class="card h-100">
|
| 94 |
+
<div class="card-body">
|
| 95 |
+
{% if ad.image_urls %}
|
| 96 |
+
<div class="mb-3">
|
| 97 |
+
<img src="{{ ad.image_urls[0] }}" class="img-fluid rounded" alt="Ad Image">
|
| 98 |
+
</div>
|
| 99 |
+
{% endif %}
|
| 100 |
+
|
| 101 |
+
<p class="card-text">{{ ad.content[:200] }}{% if ad.content|length > 200 %}...{% endif %}</p>
|
| 102 |
+
|
| 103 |
+
{% if ad.sentiment %}
|
| 104 |
+
<div class="mb-2">
|
| 105 |
+
<strong>Sentiment:</strong>
|
| 106 |
+
<span class="badge bg-{{ 'success' if ad.sentiment > 0 else 'danger' if ad.sentiment < 0 else 'secondary' }}">
|
| 107 |
+
{{ "Positive" if ad.sentiment > 0 else "Negative" if ad.sentiment < 0 else "Neutral" }}
|
| 108 |
+
</span>
|
| 109 |
+
</div>
|
| 110 |
+
{% endif %}
|
| 111 |
+
|
| 112 |
+
{% if ad.topics %}
|
| 113 |
+
<div class="mb-2">
|
| 114 |
+
<strong>Topics:</strong>
|
| 115 |
+
{% for topic in ad.topics %}
|
| 116 |
+
<span class="badge bg-info me-1">{{ topic }}</span>
|
| 117 |
+
{% endfor %}
|
| 118 |
+
</div>
|
| 119 |
+
{% endif %}
|
| 120 |
+
|
| 121 |
+
<div class="mt-3">
|
| 122 |
+
<a href="{{ url_for('facebook_ads.view_ad', ad_id=ad.id) }}" class="btn btn-primary btn-sm">View Details</a>
|
| 123 |
+
{% if not ad.sentiment or not ad.topics %}
|
| 124 |
+
<a href="{{ url_for('facebook_ads.analyze_ad', ad_id=ad.id) }}" class="btn btn-secondary btn-sm">Analyze</a>
|
| 125 |
+
{% endif %}
|
| 126 |
+
</div>
|
| 127 |
+
</div>
|
| 128 |
+
<div class="card-footer text-muted">
|
| 129 |
+
{{ ad.created_at.strftime('%Y-%m-%d %H:%M:%S') }}
|
| 130 |
+
</div>
|
| 131 |
+
</div>
|
| 132 |
+
</div>
|
| 133 |
+
{% endfor %}
|
| 134 |
+
</div>
|
| 135 |
+
{% else %}
|
| 136 |
+
<div class="alert alert-info" role="alert">
|
| 137 |
+
No ads found for this advertiser. Try <a href="{{ url_for('facebook_ads.page_search') }}?page_name={{ advertiser }}">scraping more ads</a>.
|
| 138 |
+
</div>
|
| 139 |
+
{% endif %}
|
| 140 |
+
</div>
|
| 141 |
+
{% endblock %}
|
app/templates/facebook_ads/advertisers.html
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Facebook Advertisers{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<h1 class="mb-4">Facebook Advertisers</h1>
|
| 8 |
+
|
| 9 |
+
{% if advertisers %}
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-body">
|
| 12 |
+
<div class="table-responsive">
|
| 13 |
+
<table class="table table-striped table-hover">
|
| 14 |
+
<thead>
|
| 15 |
+
<tr>
|
| 16 |
+
<th>Advertiser</th>
|
| 17 |
+
<th>Number of Ads</th>
|
| 18 |
+
<th>Actions</th>
|
| 19 |
+
</tr>
|
| 20 |
+
</thead>
|
| 21 |
+
<tbody>
|
| 22 |
+
{% for advertiser in advertisers %}
|
| 23 |
+
<tr>
|
| 24 |
+
<td>{{ advertiser[0] }}</td>
|
| 25 |
+
<td>{{ advertiser[1] }}</td>
|
| 26 |
+
<td>
|
| 27 |
+
<a href="{{ url_for('facebook_ads.advertiser_detail', advertiser_name=advertiser[0]) }}"
|
| 28 |
+
class="btn btn-primary btn-sm">View Ads</a>
|
| 29 |
+
</td>
|
| 30 |
+
</tr>
|
| 31 |
+
{% endfor %}
|
| 32 |
+
</tbody>
|
| 33 |
+
</table>
|
| 34 |
+
</div>
|
| 35 |
+
</div>
|
| 36 |
+
</div>
|
| 37 |
+
{% else %}
|
| 38 |
+
<div class="alert alert-info" role="alert">
|
| 39 |
+
No advertisers found. Try <a href="{{ url_for('facebook_ads.search') }}">searching for ads</a> first.
|
| 40 |
+
</div>
|
| 41 |
+
{% endif %}
|
| 42 |
+
</div>
|
| 43 |
+
{% endblock %}
|
app/templates/facebook_ads/index.html
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Facebook Ads Dashboard{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<h1 class="mb-4">Facebook Ads Dashboard</h1>
|
| 8 |
+
|
| 9 |
+
<div class="row">
|
| 10 |
+
<div class="col-md-6">
|
| 11 |
+
<div class="card mb-4">
|
| 12 |
+
<div class="card-body">
|
| 13 |
+
<h5 class="card-title">Search Ads</h5>
|
| 14 |
+
<p class="card-text">Search for Facebook ads using keywords and filters.</p>
|
| 15 |
+
<a href="{{ url_for('facebook_ads.search') }}" class="btn btn-primary">Search Ads</a>
|
| 16 |
+
</div>
|
| 17 |
+
</div>
|
| 18 |
+
</div>
|
| 19 |
+
|
| 20 |
+
<div class="col-md-6">
|
| 21 |
+
<div class="card mb-4">
|
| 22 |
+
<div class="card-body">
|
| 23 |
+
<h5 class="card-title">Page Search</h5>
|
| 24 |
+
<p class="card-text">Search for ads from specific Facebook pages.</p>
|
| 25 |
+
<a href="{{ url_for('facebook_ads.page_search') }}" class="btn btn-primary">Search by Page</a>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
</div>
|
| 29 |
+
</div>
|
| 30 |
+
|
| 31 |
+
<div class="row">
|
| 32 |
+
<div class="col-md-6">
|
| 33 |
+
<div class="card mb-4">
|
| 34 |
+
<div class="card-body">
|
| 35 |
+
<h5 class="card-title">View Results</h5>
|
| 36 |
+
<p class="card-text">Browse and analyze collected Facebook ads.</p>
|
| 37 |
+
<a href="{{ url_for('facebook_ads.results') }}" class="btn btn-primary">View Results</a>
|
| 38 |
+
</div>
|
| 39 |
+
</div>
|
| 40 |
+
</div>
|
| 41 |
+
|
| 42 |
+
<div class="col-md-6">
|
| 43 |
+
<div class="card mb-4">
|
| 44 |
+
<div class="card-body">
|
| 45 |
+
<h5 class="card-title">Advertisers</h5>
|
| 46 |
+
<p class="card-text">View and analyze advertisers and their ads.</p>
|
| 47 |
+
<a href="{{ url_for('facebook_ads.advertisers') }}" class="btn btn-primary">View Advertisers</a>
|
| 48 |
+
</div>
|
| 49 |
+
</div>
|
| 50 |
+
</div>
|
| 51 |
+
</div>
|
| 52 |
+
</div>
|
| 53 |
+
{% endblock %}
|
app/templates/facebook_ads/page_search.html
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Search Facebook Page Ads{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<h1 class="mb-4">Search Facebook Page Ads</h1>
|
| 8 |
+
|
| 9 |
+
<div class="card mb-4">
|
| 10 |
+
<div class="card-body">
|
| 11 |
+
<form method="POST" action="{{ url_for('facebook_ads.page_search') }}">
|
| 12 |
+
{{ form.csrf_token }}
|
| 13 |
+
<div class="mb-3">
|
| 14 |
+
<label for="page_name" class="form-label">Facebook Page Name</label>
|
| 15 |
+
<input type="text" class="form-control" id="page_name" name="page_name" required
|
| 16 |
+
placeholder="Enter the Facebook page name or URL">
|
| 17 |
+
<small class="text-muted">Example: cocacola or https://www.facebook.com/cocacola</small>
|
| 18 |
+
</div>
|
| 19 |
+
|
| 20 |
+
<div class="mb-3">
|
| 21 |
+
<label for="num_scrolls" class="form-label">Number of Scrolls</label>
|
| 22 |
+
<input type="number" class="form-control" id="num_scrolls" name="num_scrolls"
|
| 23 |
+
value="5" min="1" max="50">
|
| 24 |
+
<small class="text-muted">More scrolls = more ads, but takes longer to scrape</small>
|
| 25 |
+
</div>
|
| 26 |
+
|
| 27 |
+
<button type="submit" class="btn btn-primary">Search Page Ads</button>
|
| 28 |
+
<a href="{{ url_for('facebook_ads.index') }}" class="btn btn-secondary">Back to Dashboard</a>
|
| 29 |
+
</form>
|
| 30 |
+
</div>
|
| 31 |
+
</div>
|
| 32 |
+
|
| 33 |
+
{% if task_id %}
|
| 34 |
+
<div class="alert alert-info" role="alert">
|
| 35 |
+
<h4 class="alert-heading">Scraping in Progress!</h4>
|
| 36 |
+
<p>Your page search request is being processed. This may take a few minutes depending on the number of scrolls.</p>
|
| 37 |
+
<hr>
|
| 38 |
+
<p class="mb-0">You can view the results on the <a href="{{ url_for('facebook_ads.results') }}">Results page</a> once the scraping is complete.</p>
|
| 39 |
+
</div>
|
| 40 |
+
{% endif %}
|
| 41 |
+
|
| 42 |
+
{% with messages = get_flashed_messages(with_categories=true) %}
|
| 43 |
+
{% if messages %}
|
| 44 |
+
{% for category, message in messages %}
|
| 45 |
+
<div class="alert alert-{{ category }}" role="alert">
|
| 46 |
+
{{ message }}
|
| 47 |
+
</div>
|
| 48 |
+
{% endfor %}
|
| 49 |
+
{% endif %}
|
| 50 |
+
{% endwith %}
|
| 51 |
+
</div>
|
| 52 |
+
{% endblock %}
|
app/templates/facebook_ads/results.html
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Facebook Ads Results{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<h1 class="mb-4">Facebook Ads Results</h1>
|
| 8 |
+
|
| 9 |
+
<div class="card mb-4">
|
| 10 |
+
<div class="card-body">
|
| 11 |
+
<form method="GET" action="{{ url_for('facebook_ads.results') }}" class="row g-3">
|
| 12 |
+
<div class="col-md-4">
|
| 13 |
+
<label for="query" class="form-label">Search Query</label>
|
| 14 |
+
<input type="text" class="form-control" id="query" name="query" value="{{ query }}">
|
| 15 |
+
</div>
|
| 16 |
+
<div class="col-md-4">
|
| 17 |
+
<label for="advertiser" class="form-label">Advertiser</label>
|
| 18 |
+
<input type="text" class="form-control" id="advertiser" name="advertiser" value="{{ advertiser }}">
|
| 19 |
+
</div>
|
| 20 |
+
<div class="col-md-4">
|
| 21 |
+
<label class="form-label"> </label>
|
| 22 |
+
<div>
|
| 23 |
+
<button type="submit" class="btn btn-primary">Filter</button>
|
| 24 |
+
<a href="{{ url_for('facebook_ads.results') }}" class="btn btn-secondary">Clear Filters</a>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</form>
|
| 28 |
+
</div>
|
| 29 |
+
</div>
|
| 30 |
+
|
| 31 |
+
{% if ads %}
|
| 32 |
+
<div class="row">
|
| 33 |
+
{% for ad in ads %}
|
| 34 |
+
<div class="col-md-6 mb-4">
|
| 35 |
+
<div class="card h-100">
|
| 36 |
+
<div class="card-body">
|
| 37 |
+
<h5 class="card-title">
|
| 38 |
+
<a href="{{ url_for('facebook_ads.advertiser_detail', advertiser_name=ad.advertiser) }}">
|
| 39 |
+
{{ ad.advertiser }}
|
| 40 |
+
</a>
|
| 41 |
+
</h5>
|
| 42 |
+
|
| 43 |
+
{% if ad.image_urls %}
|
| 44 |
+
<div class="mb-3">
|
| 45 |
+
<img src="{{ ad.image_urls[0] }}" class="img-fluid rounded" alt="Ad Image">
|
| 46 |
+
</div>
|
| 47 |
+
{% endif %}
|
| 48 |
+
|
| 49 |
+
<p class="card-text">{{ ad.content[:200] }}{% if ad.content|length > 200 %}...{% endif %}</p>
|
| 50 |
+
|
| 51 |
+
{% if ad.sentiment %}
|
| 52 |
+
<div class="mb-2">
|
| 53 |
+
<strong>Sentiment:</strong>
|
| 54 |
+
<span class="badge bg-{{ 'success' if ad.sentiment > 0 else 'danger' if ad.sentiment < 0 else 'secondary' }}">
|
| 55 |
+
{{ "Positive" if ad.sentiment > 0 else "Negative" if ad.sentiment < 0 else "Neutral" }}
|
| 56 |
+
</span>
|
| 57 |
+
</div>
|
| 58 |
+
{% endif %}
|
| 59 |
+
|
| 60 |
+
{% if ad.topics %}
|
| 61 |
+
<div class="mb-2">
|
| 62 |
+
<strong>Topics:</strong>
|
| 63 |
+
{% for topic in ad.topics %}
|
| 64 |
+
<span class="badge bg-info me-1">{{ topic }}</span>
|
| 65 |
+
{% endfor %}
|
| 66 |
+
</div>
|
| 67 |
+
{% endif %}
|
| 68 |
+
|
| 69 |
+
<div class="mt-3">
|
| 70 |
+
<a href="{{ url_for('facebook_ads.view_ad', ad_id=ad.id) }}" class="btn btn-primary btn-sm">View Details</a>
|
| 71 |
+
<a href="{{ url_for('facebook_ads.analyze_ad', ad_id=ad.id) }}" class="btn btn-secondary btn-sm">Analyze</a>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
<div class="card-footer text-muted">
|
| 75 |
+
Scraped: {{ ad.created_at.strftime('%Y-%m-%d %H:%M:%S') }}
|
| 76 |
+
</div>
|
| 77 |
+
</div>
|
| 78 |
+
</div>
|
| 79 |
+
{% endfor %}
|
| 80 |
+
</div>
|
| 81 |
+
{% else %}
|
| 82 |
+
<div class="alert alert-info" role="alert">
|
| 83 |
+
No ads found. Try adjusting your search filters or <a href="{{ url_for('facebook_ads.search') }}">search for new ads</a>.
|
| 84 |
+
</div>
|
| 85 |
+
{% endif %}
|
| 86 |
+
</div>
|
| 87 |
+
{% endblock %}
|
app/templates/facebook_ads/search.html
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Search Facebook Ads{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container mt-4">
|
| 7 |
+
<h1 class="mb-4">Search Facebook Ads</h1>
|
| 8 |
+
|
| 9 |
+
<div class="card mb-4">
|
| 10 |
+
<div class="card-body">
|
| 11 |
+
<form method="POST" action="{{ url_for('facebook_ads.search') }}">
|
| 12 |
+
{{ form.csrf_token }}
|
| 13 |
+
<div class="mb-3">
|
| 14 |
+
<label for="search_query" class="form-label">Search Query</label>
|
| 15 |
+
<input type="text" class="form-control" id="search_query" name="search_query" required
|
| 16 |
+
placeholder="Enter keywords to search for ads">
|
| 17 |
+
</div>
|
| 18 |
+
|
| 19 |
+
<div class="mb-3">
|
| 20 |
+
<label for="num_scrolls" class="form-label">Number of Scrolls</label>
|
| 21 |
+
<input type="number" class="form-control" id="num_scrolls" name="num_scrolls"
|
| 22 |
+
value="5" min="1" max="50">
|
| 23 |
+
<small class="text-muted">More scrolls = more ads, but takes longer to scrape</small>
|
| 24 |
+
</div>
|
| 25 |
+
|
| 26 |
+
<div class="mb-3">
|
| 27 |
+
<label for="country_code" class="form-label">Country</label>
|
| 28 |
+
<select class="form-control" id="country_code" name="country_code">
|
| 29 |
+
<option value="ALL">All Countries</option>
|
| 30 |
+
<option value="US">United States</option>
|
| 31 |
+
<option value="GB">United Kingdom</option>
|
| 32 |
+
<option value="CA">Canada</option>
|
| 33 |
+
<option value="AU">Australia</option>
|
| 34 |
+
<!-- Add more countries as needed -->
|
| 35 |
+
</select>
|
| 36 |
+
</div>
|
| 37 |
+
|
| 38 |
+
<button type="submit" class="btn btn-primary">Search Ads</button>
|
| 39 |
+
<a href="{{ url_for('facebook_ads.index') }}" class="btn btn-secondary">Back to Dashboard</a>
|
| 40 |
+
</form>
|
| 41 |
+
</div>
|
| 42 |
+
</div>
|
| 43 |
+
|
| 44 |
+
{% if task_id %}
|
| 45 |
+
<div class="alert alert-info" role="alert">
|
| 46 |
+
<h4 class="alert-heading">Scraping in Progress!</h4>
|
| 47 |
+
<p>Your search request is being processed. This may take a few minutes depending on the number of scrolls.</p>
|
| 48 |
+
<hr>
|
| 49 |
+
<p class="mb-0">You can view the results on the <a href="{{ url_for('facebook_ads.results') }}">Results page</a> once the scraping is complete.</p>
|
| 50 |
+
</div>
|
| 51 |
+
{% endif %}
|
| 52 |
+
|
| 53 |
+
{% with messages = get_flashed_messages(with_categories=true) %}
|
| 54 |
+
{% if messages %}
|
| 55 |
+
{% for category, message in messages %}
|
| 56 |
+
<div class="alert alert-{{ category }}" role="alert">
|
| 57 |
+
{{ message }}
|
| 58 |
+
</div>
|
| 59 |
+
{% endfor %}
|
| 60 |
+
{% endif %}
|
| 61 |
+
{% endwith %}
|
| 62 |
+
</div>
|
| 63 |
+
{% endblock %}
|
app/templates/login.html
CHANGED
|
@@ -1,15 +1,28 @@
|
|
| 1 |
{% extends "base.html" %}
|
| 2 |
|
| 3 |
{% block content %}
|
| 4 |
-
<
|
| 5 |
-
<
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
{% endblock %}
|
|
|
|
| 1 |
{% extends "base.html" %}
|
| 2 |
|
| 3 |
{% block content %}
|
| 4 |
+
<div class="container mt-5">
|
| 5 |
+
<div class="row justify-content-center">
|
| 6 |
+
<div class="col-md-6">
|
| 7 |
+
<div class="card">
|
| 8 |
+
<div class="card-header">Login</div>
|
| 9 |
+
<div class="card-body">
|
| 10 |
+
<form method="POST" action="{{ url_for('auth.login') }}">
|
| 11 |
+
{{ form.hidden_tag() }}
|
| 12 |
+
<div class="form-group">
|
| 13 |
+
{{ form.email.label }} {{ form.email(class="form-control") }}
|
| 14 |
+
</div>
|
| 15 |
+
<div class="form-group">
|
| 16 |
+
{{ form.password.label }} {{ form.password(class="form-control") }}
|
| 17 |
+
</div>
|
| 18 |
+
<button type="submit" class="btn btn-primary">Login</button>
|
| 19 |
+
</form>
|
| 20 |
+
<div class="mt-3">
|
| 21 |
+
<p>Don't have an account? <a href="{{ url_for('auth.register') }}">Register here</a>.</p>
|
| 22 |
+
</div>
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
{% endblock %}
|
celery.db
ADDED
|
Binary file (32.8 kB). View file
|
|
|
config.py
CHANGED
|
@@ -7,10 +7,13 @@ class Config:
|
|
| 7 |
if SECRET_KEY == 'dev-secret-key-change-in-production':
|
| 8 |
print("WARNING: Using default SECRET_KEY. This is insecure and should be changed in production.")
|
| 9 |
|
| 10 |
-
|
|
|
|
| 11 |
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Use a mock API key for development if not provided
|
| 16 |
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'sk-mock-key-for-development')
|
|
|
|
| 7 |
if SECRET_KEY == 'dev-secret-key-change-in-production':
|
| 8 |
print("WARNING: Using default SECRET_KEY. This is insecure and should be changed in production.")
|
| 9 |
|
| 10 |
+
# Use SQLite for simplicity
|
| 11 |
+
SQLALCHEMY_DATABASE_URI = os.getenv('DATABASE_URL', 'sqlite:///app.db')
|
| 12 |
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
| 13 |
+
|
| 14 |
+
# Use SQLite for Celery broker instead of Redis
|
| 15 |
+
CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'sqla+sqlite:///celery.db')
|
| 16 |
+
CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'db+sqlite:///celery-results.db')
|
| 17 |
|
| 18 |
# Use a mock API key for development if not provided
|
| 19 |
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'sk-mock-key-for-development')
|
migrations/README
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Single-database configuration for Flask.
|
migrations/__pycache__/env.cpython-312.pyc
ADDED
|
Binary file (4.5 kB). View file
|
|
|
migrations/alembic.ini
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# A generic, single database configuration.
|
| 2 |
+
|
| 3 |
+
[alembic]
|
| 4 |
+
# template used to generate migration files
|
| 5 |
+
# file_template = %%(rev)s_%%(slug)s
|
| 6 |
+
|
| 7 |
+
# set to 'true' to run the environment during
|
| 8 |
+
# the 'revision' command, regardless of autogenerate
|
| 9 |
+
# revision_environment = false
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Logging configuration
|
| 13 |
+
[loggers]
|
| 14 |
+
keys = root,sqlalchemy,alembic,flask_migrate
|
| 15 |
+
|
| 16 |
+
[handlers]
|
| 17 |
+
keys = console
|
| 18 |
+
|
| 19 |
+
[formatters]
|
| 20 |
+
keys = generic
|
| 21 |
+
|
| 22 |
+
[logger_root]
|
| 23 |
+
level = WARN
|
| 24 |
+
handlers = console
|
| 25 |
+
qualname =
|
| 26 |
+
|
| 27 |
+
[logger_sqlalchemy]
|
| 28 |
+
level = WARN
|
| 29 |
+
handlers =
|
| 30 |
+
qualname = sqlalchemy.engine
|
| 31 |
+
|
| 32 |
+
[logger_alembic]
|
| 33 |
+
level = INFO
|
| 34 |
+
handlers =
|
| 35 |
+
qualname = alembic
|
| 36 |
+
|
| 37 |
+
[logger_flask_migrate]
|
| 38 |
+
level = INFO
|
| 39 |
+
handlers =
|
| 40 |
+
qualname = flask_migrate
|
| 41 |
+
|
| 42 |
+
[handler_console]
|
| 43 |
+
class = StreamHandler
|
| 44 |
+
args = (sys.stderr,)
|
| 45 |
+
level = NOTSET
|
| 46 |
+
formatter = generic
|
| 47 |
+
|
| 48 |
+
[formatter_generic]
|
| 49 |
+
format = %(levelname)-5.5s [%(name)s] %(message)s
|
| 50 |
+
datefmt = %H:%M:%S
|
migrations/env.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from logging.config import fileConfig
|
| 3 |
+
|
| 4 |
+
from flask import current_app
|
| 5 |
+
|
| 6 |
+
from alembic import context
|
| 7 |
+
|
| 8 |
+
# this is the Alembic Config object, which provides
|
| 9 |
+
# access to the values within the .ini file in use.
|
| 10 |
+
config = context.config
|
| 11 |
+
|
| 12 |
+
# Interpret the config file for Python logging.
|
| 13 |
+
# This line sets up loggers basically.
|
| 14 |
+
fileConfig(config.config_file_name)
|
| 15 |
+
logger = logging.getLogger('alembic.env')
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_engine():
|
| 19 |
+
try:
|
| 20 |
+
# this works with Flask-SQLAlchemy<3 and Alchemical
|
| 21 |
+
return current_app.extensions['migrate'].db.get_engine()
|
| 22 |
+
except (TypeError, AttributeError):
|
| 23 |
+
# this works with Flask-SQLAlchemy>=3
|
| 24 |
+
return current_app.extensions['migrate'].db.engine
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_engine_url():
|
| 28 |
+
try:
|
| 29 |
+
return get_engine().url.render_as_string(hide_password=False).replace(
|
| 30 |
+
'%', '%%')
|
| 31 |
+
except AttributeError:
|
| 32 |
+
return str(get_engine().url).replace('%', '%%')
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# add your model's MetaData object here
|
| 36 |
+
# for 'autogenerate' support
|
| 37 |
+
# from myapp import mymodel
|
| 38 |
+
# target_metadata = mymodel.Base.metadata
|
| 39 |
+
config.set_main_option('sqlalchemy.url', get_engine_url())
|
| 40 |
+
target_db = current_app.extensions['migrate'].db
|
| 41 |
+
|
| 42 |
+
# other values from the config, defined by the needs of env.py,
|
| 43 |
+
# can be acquired:
|
| 44 |
+
# my_important_option = config.get_main_option("my_important_option")
|
| 45 |
+
# ... etc.
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def get_metadata():
|
| 49 |
+
if hasattr(target_db, 'metadatas'):
|
| 50 |
+
return target_db.metadatas[None]
|
| 51 |
+
return target_db.metadata
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def run_migrations_offline():
|
| 55 |
+
"""Run migrations in 'offline' mode.
|
| 56 |
+
|
| 57 |
+
This configures the context with just a URL
|
| 58 |
+
and not an Engine, though an Engine is acceptable
|
| 59 |
+
here as well. By skipping the Engine creation
|
| 60 |
+
we don't even need a DBAPI to be available.
|
| 61 |
+
|
| 62 |
+
Calls to context.execute() here emit the given string to the
|
| 63 |
+
script output.
|
| 64 |
+
|
| 65 |
+
"""
|
| 66 |
+
url = config.get_main_option("sqlalchemy.url")
|
| 67 |
+
context.configure(
|
| 68 |
+
url=url, target_metadata=get_metadata(), literal_binds=True
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
with context.begin_transaction():
|
| 72 |
+
context.run_migrations()
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def run_migrations_online():
|
| 76 |
+
"""Run migrations in 'online' mode.
|
| 77 |
+
|
| 78 |
+
In this scenario we need to create an Engine
|
| 79 |
+
and associate a connection with the context.
|
| 80 |
+
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
# this callback is used to prevent an auto-migration from being generated
|
| 84 |
+
# when there are no changes to the schema
|
| 85 |
+
# reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html
|
| 86 |
+
def process_revision_directives(context, revision, directives):
|
| 87 |
+
if getattr(config.cmd_opts, 'autogenerate', False):
|
| 88 |
+
script = directives[0]
|
| 89 |
+
if script.upgrade_ops.is_empty():
|
| 90 |
+
directives[:] = []
|
| 91 |
+
logger.info('No changes in schema detected.')
|
| 92 |
+
|
| 93 |
+
conf_args = current_app.extensions['migrate'].configure_args
|
| 94 |
+
if conf_args.get("process_revision_directives") is None:
|
| 95 |
+
conf_args["process_revision_directives"] = process_revision_directives
|
| 96 |
+
|
| 97 |
+
connectable = get_engine()
|
| 98 |
+
|
| 99 |
+
with connectable.connect() as connection:
|
| 100 |
+
context.configure(
|
| 101 |
+
connection=connection,
|
| 102 |
+
target_metadata=get_metadata(),
|
| 103 |
+
**conf_args
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
with context.begin_transaction():
|
| 107 |
+
context.run_migrations()
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
if context.is_offline_mode():
|
| 111 |
+
run_migrations_offline()
|
| 112 |
+
else:
|
| 113 |
+
run_migrations_online()
|
migrations/script.py.mako
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""${message}
|
| 2 |
+
|
| 3 |
+
Revision ID: ${up_revision}
|
| 4 |
+
Revises: ${down_revision | comma,n}
|
| 5 |
+
Create Date: ${create_date}
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
from alembic import op
|
| 9 |
+
import sqlalchemy as sa
|
| 10 |
+
${imports if imports else ""}
|
| 11 |
+
|
| 12 |
+
# revision identifiers, used by Alembic.
|
| 13 |
+
revision = ${repr(up_revision)}
|
| 14 |
+
down_revision = ${repr(down_revision)}
|
| 15 |
+
branch_labels = ${repr(branch_labels)}
|
| 16 |
+
depends_on = ${repr(depends_on)}
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def upgrade():
|
| 20 |
+
${upgrades if upgrades else "pass"}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def downgrade():
|
| 24 |
+
${downgrades if downgrades else "pass"}
|
migrations/versions/__pycache__/dddcd665398d_add_facebook_ad_table.cpython-312.pyc
ADDED
|
Binary file (4.34 kB). View file
|
|
|
migrations/versions/dddcd665398d_add_facebook_ad_table.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Add facebook_ad table
|
| 2 |
+
|
| 3 |
+
Revision ID: dddcd665398d
|
| 4 |
+
Revises:
|
| 5 |
+
Create Date: 2025-03-10 09:02:26.975759
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
from alembic import op
|
| 9 |
+
import sqlalchemy as sa
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# revision identifiers, used by Alembic.
|
| 13 |
+
revision = 'dddcd665398d'
|
| 14 |
+
down_revision = None
|
| 15 |
+
branch_labels = None
|
| 16 |
+
depends_on = None
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def upgrade():
|
| 20 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
| 21 |
+
op.create_table('facebook_ad',
|
| 22 |
+
sa.Column('id', sa.String(length=36), nullable=False),
|
| 23 |
+
sa.Column('ad_id', sa.String(length=255), nullable=True),
|
| 24 |
+
sa.Column('advertiser', sa.String(length=255), nullable=True),
|
| 25 |
+
sa.Column('advertiser_id', sa.String(length=255), nullable=True),
|
| 26 |
+
sa.Column('content', sa.Text(), nullable=True),
|
| 27 |
+
sa.Column('images', sa.JSON(), nullable=True),
|
| 28 |
+
sa.Column('links', sa.JSON(), nullable=True),
|
| 29 |
+
sa.Column('search_query', sa.String(length=255), nullable=True),
|
| 30 |
+
sa.Column('position', sa.Integer(), nullable=True),
|
| 31 |
+
sa.Column('sentiment', sa.JSON(), nullable=True),
|
| 32 |
+
sa.Column('topics', sa.JSON(), nullable=True),
|
| 33 |
+
sa.Column('entities', sa.JSON(), nullable=True),
|
| 34 |
+
sa.Column('raw_data', sa.JSON(), nullable=True),
|
| 35 |
+
sa.Column('raw_text', sa.Text(), nullable=True),
|
| 36 |
+
sa.Column('created_at', sa.DateTime(), nullable=True),
|
| 37 |
+
sa.Column('updated_at', sa.DateTime(), nullable=True),
|
| 38 |
+
sa.Column('user_id', sa.Integer(), nullable=True),
|
| 39 |
+
sa.ForeignKeyConstraint(['user_id'], ['user.id'], ),
|
| 40 |
+
sa.PrimaryKeyConstraint('id')
|
| 41 |
+
)
|
| 42 |
+
with op.batch_alter_table('facebook_ad', schema=None) as batch_op:
|
| 43 |
+
batch_op.create_index(batch_op.f('ix_facebook_ad_ad_id'), ['ad_id'], unique=False)
|
| 44 |
+
batch_op.create_index(batch_op.f('ix_facebook_ad_advertiser'), ['advertiser'], unique=False)
|
| 45 |
+
batch_op.create_index(batch_op.f('ix_facebook_ad_advertiser_id'), ['advertiser_id'], unique=False)
|
| 46 |
+
batch_op.create_index(batch_op.f('ix_facebook_ad_search_query'), ['search_query'], unique=False)
|
| 47 |
+
|
| 48 |
+
# ### end Alembic commands ###
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def downgrade():
|
| 52 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
| 53 |
+
with op.batch_alter_table('facebook_ad', schema=None) as batch_op:
|
| 54 |
+
batch_op.drop_index(batch_op.f('ix_facebook_ad_search_query'))
|
| 55 |
+
batch_op.drop_index(batch_op.f('ix_facebook_ad_advertiser_id'))
|
| 56 |
+
batch_op.drop_index(batch_op.f('ix_facebook_ad_advertiser'))
|
| 57 |
+
batch_op.drop_index(batch_op.f('ix_facebook_ad_ad_id'))
|
| 58 |
+
|
| 59 |
+
op.drop_table('facebook_ad')
|
| 60 |
+
# ### end Alembic commands ###
|
requirements.txt
CHANGED
|
@@ -1,22 +1,21 @@
|
|
| 1 |
-
Flask==
|
| 2 |
-
Flask-SQLAlchemy==3.
|
| 3 |
-
Flask-Login==0.6.
|
| 4 |
-
Flask-
|
| 5 |
-
Flask-
|
| 6 |
-
|
| 7 |
-
celery==5.3.
|
| 8 |
-
redis==5.0.
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
torch==2.0.1
|
|
|
|
| 1 |
+
Flask==3.0.0
|
| 2 |
+
Flask-SQLAlchemy==3.1.1
|
| 3 |
+
Flask-Login==0.6.3
|
| 4 |
+
Flask-Migrate==4.0.5
|
| 5 |
+
Flask-WTF==1.2.1
|
| 6 |
+
email_validator==2.1.0.post1
|
| 7 |
+
celery==5.3.6
|
| 8 |
+
redis==5.0.1
|
| 9 |
+
alembic==1.13.1
|
| 10 |
+
python-dotenv==1.0.1
|
| 11 |
+
gunicorn==21.2.0
|
| 12 |
+
psycopg2-binary==2.9.9
|
| 13 |
+
requests==2.31.0
|
| 14 |
+
beautifulsoup4==4.12.3
|
| 15 |
+
selenium==4.17.2
|
| 16 |
+
transformers==4.37.2
|
| 17 |
+
torch==2.1.2
|
| 18 |
+
textblob==0.17.1
|
| 19 |
+
spacy==3.7.2
|
| 20 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
|
| 21 |
+
webdriver-manager==4.0.1
|
|
|