rastof9 commited on
Commit
f788a29
·
1 Parent(s): 179bfbc
.env CHANGED
@@ -1,9 +1,10 @@
1
  FLASK_APP=app.py
2
  FLASK_ENV=development
3
- SECRET_KEY=your-secret-key-here
4
- DATABASE_URL=postgresql://user:password@localhost:5432/facebook_ads
5
- CELERY_BROKER_URL=redis://localhost:6379/0
6
- CELERY_RESULT_BACKEND=redis://localhost:6379/0
7
- OPENAI_API_KEY=your-openai-api-key-here
 
8
  INSTANCE_PATH=/tmp/instance
9
  SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub
 
1
  FLASK_APP=app.py
2
  FLASK_ENV=development
3
+ FLASK_DEBUG=1
4
+ SECRET_KEY=dev-secret-key-change-in-production
5
+ DATABASE_URL=sqlite:///app.db
6
+ CELERY_BROKER_URL=sqla+sqlite:///celery.db
7
+ CELERY_RESULT_BACKEND=db+sqlite:///celery-results.db
8
+ OPENAI_API_KEY=sk-mock-key-for-development
9
  INSTANCE_PATH=/tmp/instance
10
  SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub
__pycache__/config.cpython-312.pyc CHANGED
Binary files a/__pycache__/config.cpython-312.pyc and b/__pycache__/config.cpython-312.pyc differ
 
app.py CHANGED
@@ -2,6 +2,11 @@ from flask import Flask
2
  from flask_migrate import Migrate
3
  from app import db, create_app
4
  from config import get_config
 
 
 
 
 
5
 
6
  migrate = Migrate()
7
 
@@ -10,6 +15,6 @@ def create_flask_app():
10
  migrate.init_app(app, db)
11
  return app
12
 
13
- if __name__ == "__main__":
14
  app = create_flask_app()
15
  app.run(debug=True)
 
2
  from flask_migrate import Migrate
3
  from app import db, create_app
4
  from config import get_config
5
+ from dotenv import load_dotenv
6
+ import os
7
+
8
+ # Load environment variables from .env file
9
+ load_dotenv()
10
 
11
  migrate = Migrate()
12
 
 
15
  migrate.init_app(app, db)
16
  return app
17
 
18
+ if __name__ == '__main__':
19
  app = create_flask_app()
20
  app.run(debug=True)
app/__init__.py CHANGED
@@ -1,7 +1,9 @@
1
  from flask import Flask
2
  from flask_sqlalchemy import SQLAlchemy
 
3
  from flask_login import LoginManager
4
  from celery import Celery
 
5
  import redis
6
  import os
7
  import logging
@@ -18,9 +20,10 @@ logger = logging.getLogger(__name__)
18
 
19
  # Initialize extensions
20
  db = SQLAlchemy()
 
21
  login = LoginManager()
22
  login.login_view = 'auth.login'
23
- celery = Celery(__name__)
24
  cache = None # Initialize later when app context is available
25
 
26
  # Set up user loader for Flask-Login
@@ -29,7 +32,7 @@ def load_user(user_id):
29
  from .models import User
30
  return User.query.get(int(user_id))
31
 
32
- def create_app(config_class=None):
33
  logger.info("Starting application initialization...")
34
 
35
  # Create the Flask app
@@ -37,12 +40,8 @@ def create_app(config_class=None):
37
  logger.info("Flask app created")
38
 
39
  # Load configuration
40
- if config_class is None:
41
- logger.info("Loading default configuration...")
42
- app.config.from_object('config.Config')
43
- else:
44
- logger.info(f"Loading configuration from {config_class}...")
45
- app.config.from_object(config_class)
46
 
47
  # Ensure instance path exists
48
  logger.info(f"Ensuring instance path exists: {app.config['INSTANCE_PATH']}")
@@ -52,6 +51,7 @@ def create_app(config_class=None):
52
  # Initialize extensions
53
  logger.info("Initializing SQLAlchemy...")
54
  db.init_app(app)
 
55
 
56
  logger.info("Initializing LoginManager...")
57
  login.init_app(app)
@@ -64,12 +64,18 @@ def create_app(config_class=None):
64
  try:
65
  logger.info(f"Initializing Redis cache with URL: {app.config['CELERY_BROKER_URL']}")
66
  global cache
67
- cache = redis.Redis.from_url(app.config['CELERY_BROKER_URL'])
68
- logger.info("Redis cache initialized successfully")
 
 
 
 
 
69
  except Exception as e:
70
  logger.error(f"Error initializing Redis cache: {e}")
71
- # Continue without Redis for now
72
- pass
 
73
 
74
  # Register test blueprint first (this should always work)
75
  logger.info("Registering test blueprint...")
@@ -124,6 +130,14 @@ def create_app(config_class=None):
124
  except Exception as e:
125
  logger.error(f"Error registering auth blueprint: {e}")
126
 
 
 
 
 
 
 
 
 
127
  # Add a simple route directly to the app
128
  @app.route('/')
129
  def index():
 
1
  from flask import Flask
2
  from flask_sqlalchemy import SQLAlchemy
3
+ from flask_migrate import Migrate
4
  from flask_login import LoginManager
5
  from celery import Celery
6
+ from config import Config
7
  import redis
8
  import os
9
  import logging
 
20
 
21
  # Initialize extensions
22
  db = SQLAlchemy()
23
+ migrate = Migrate()
24
  login = LoginManager()
25
  login.login_view = 'auth.login'
26
+ celery = Celery(__name__, broker=Config.CELERY_BROKER_URL)
27
  cache = None # Initialize later when app context is available
28
 
29
  # Set up user loader for Flask-Login
 
32
  from .models import User
33
  return User.query.get(int(user_id))
34
 
35
+ def create_app(config_class=Config):
36
  logger.info("Starting application initialization...")
37
 
38
  # Create the Flask app
 
40
  logger.info("Flask app created")
41
 
42
  # Load configuration
43
+ logger.info(f"Loading configuration from {config_class}...")
44
+ app.config.from_object(config_class)
 
 
 
 
45
 
46
  # Ensure instance path exists
47
  logger.info(f"Ensuring instance path exists: {app.config['INSTANCE_PATH']}")
 
51
  # Initialize extensions
52
  logger.info("Initializing SQLAlchemy...")
53
  db.init_app(app)
54
+ migrate.init_app(app, db)
55
 
56
  logger.info("Initializing LoginManager...")
57
  login.init_app(app)
 
64
  try:
65
  logger.info(f"Initializing Redis cache with URL: {app.config['CELERY_BROKER_URL']}")
66
  global cache
67
+ # Only try to connect to Redis if the URL starts with 'redis://'
68
+ if app.config['CELERY_BROKER_URL'].startswith('redis://'):
69
+ cache = redis.Redis.from_url(app.config['CELERY_BROKER_URL'])
70
+ logger.info("Redis cache initialized successfully")
71
+ else:
72
+ logger.info("Not using Redis cache as broker URL is not Redis")
73
+ cache = None
74
  except Exception as e:
75
  logger.error(f"Error initializing Redis cache: {e}")
76
+ # Continue without Redis
77
+ cache = None
78
+ logger.info("Continuing without Redis cache")
79
 
80
  # Register test blueprint first (this should always work)
81
  logger.info("Registering test blueprint...")
 
130
  except Exception as e:
131
  logger.error(f"Error registering auth blueprint: {e}")
132
 
133
+ try:
134
+ logger.info("Importing and registering Facebook Ads blueprint...")
135
+ from .routes.facebook_ads import facebook_ads_bp
136
+ app.register_blueprint(facebook_ads_bp)
137
+ logger.info("Facebook Ads blueprint registered successfully")
138
+ except Exception as e:
139
+ logger.error(f"Error registering Facebook Ads blueprint: {e}")
140
+
141
  # Add a simple route directly to the app
142
  @app.route('/')
143
  def index():
app/__pycache__/__init__.cpython-312.pyc CHANGED
Binary files a/app/__pycache__/__init__.cpython-312.pyc and b/app/__pycache__/__init__.cpython-312.pyc differ
 
app/models/__pycache__/facebook_ad.cpython-312.pyc ADDED
Binary file (5.78 kB). View file
 
app/models/facebook_ad.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app import db
2
+ from datetime import datetime
3
+ import uuid
4
+ import json
5
+
6
+ class FacebookAd(db.Model):
7
+ """Model for storing Facebook Ads data."""
8
+ id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
9
+
10
+ # Facebook-specific fields
11
+ ad_id = db.Column(db.String(255), nullable=True, index=True)
12
+ advertiser = db.Column(db.String(255), nullable=True, index=True)
13
+ advertiser_id = db.Column(db.String(255), nullable=True, index=True)
14
+
15
+ # Content fields
16
+ content = db.Column(db.Text, nullable=True)
17
+ images = db.Column(db.JSON, nullable=True) # URLs to images
18
+ links = db.Column(db.JSON, nullable=True) # URLs in the ad
19
+
20
+ # Search metadata
21
+ search_query = db.Column(db.String(255), nullable=True, index=True)
22
+ position = db.Column(db.Integer, nullable=True)
23
+
24
+ # Analysis results
25
+ sentiment = db.Column(db.JSON, nullable=True)
26
+ topics = db.Column(db.JSON, nullable=True)
27
+ entities = db.Column(db.JSON, nullable=True)
28
+
29
+ # Raw data for future processing
30
+ raw_data = db.Column(db.JSON, nullable=True)
31
+ raw_text = db.Column(db.Text, nullable=True)
32
+
33
+ # Timestamps
34
+ created_at = db.Column(db.DateTime, default=datetime.utcnow)
35
+ updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
36
+
37
+ # User association
38
+ user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=True)
39
+
40
+ def __repr__(self):
41
+ return f'<FacebookAd {self.id} - {self.advertiser}>'
42
+
43
+ @classmethod
44
+ def from_scraper_data(cls, ad_data, user_id=None):
45
+ """Create a FacebookAd instance from scraped data."""
46
+ # Extract fields from the scraped data
47
+ ad = cls(
48
+ ad_id=ad_data.get('ad_id'),
49
+ advertiser=ad_data.get('advertiser'),
50
+ advertiser_id=ad_data.get('advertiser_id'),
51
+ content=ad_data.get('content'),
52
+ raw_text=ad_data.get('raw_text'),
53
+ search_query=ad_data.get('search_query'),
54
+ position=ad_data.get('position'),
55
+ user_id=user_id
56
+ )
57
+
58
+ # Handle JSON fields
59
+ if 'images' in ad_data and ad_data['images']:
60
+ ad.images = ad_data['images']
61
+
62
+ if 'links' in ad_data and ad_data['links']:
63
+ ad.links = ad_data['links']
64
+
65
+ # Store the full raw data for future reference
66
+ ad.raw_data = {k: v for k, v in ad_data.items() if k not in ['images', 'links']}
67
+
68
+ return ad
69
+
70
+ def get_image_urls(self):
71
+ """Get list of image URLs from the ad."""
72
+ if not self.images:
73
+ return []
74
+
75
+ if isinstance(self.images, str):
76
+ try:
77
+ return json.loads(self.images)
78
+ except:
79
+ return []
80
+
81
+ return self.images
82
+
83
+ def get_links(self):
84
+ """Get list of links from the ad."""
85
+ if not self.links:
86
+ return []
87
+
88
+ if isinstance(self.links, str):
89
+ try:
90
+ return json.loads(self.links)
91
+ except:
92
+ return []
93
+
94
+ return self.links
95
+
96
+ def to_dict(self):
97
+ """Convert the ad to a dictionary for API responses."""
98
+ return {
99
+ 'id': self.id,
100
+ 'ad_id': self.ad_id,
101
+ 'advertiser': self.advertiser,
102
+ 'advertiser_id': self.advertiser_id,
103
+ 'content': self.content,
104
+ 'images': self.get_image_urls(),
105
+ 'links': self.get_links(),
106
+ 'search_query': self.search_query,
107
+ 'position': self.position,
108
+ 'sentiment': self.sentiment,
109
+ 'topics': self.topics,
110
+ 'entities': self.entities,
111
+ 'created_at': self.created_at.isoformat() if self.created_at else None,
112
+ 'updated_at': self.updated_at.isoformat() if self.updated_at else None
113
+ }
app/routes/__pycache__/facebook_ads.cpython-312.pyc ADDED
Binary file (16.5 kB). View file
 
app/routes/facebook_ads.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Blueprint, render_template, request, jsonify, current_app, flash, redirect, url_for
2
+ from flask_login import login_required, current_user
3
+ from app.services.facebook_scraper import FacebookScraper
4
+ from app.models.facebook_ad import FacebookAd
5
+ from app.services.ai_processor import AIPipeline
6
+ from app import db, celery
7
+ import logging
8
+ import json
9
+ from datetime import datetime
10
+
11
+ logger = logging.getLogger(__name__)
12
+ facebook_ads_bp = Blueprint('facebook_ads', __name__, url_prefix='/facebook-ads')
13
+
14
+ @facebook_ads_bp.route('/', methods=['GET'])
15
+ @login_required
16
+ def index():
17
+ """Facebook Ads dashboard page."""
18
+ return render_template('facebook_ads/index.html')
19
+
20
+ @facebook_ads_bp.route('/search', methods=['GET', 'POST'])
21
+ @login_required
22
+ def search():
23
+ """Search for Facebook ads."""
24
+ if request.method == 'POST':
25
+ search_query = request.form.get('search_query', '')
26
+ num_scrolls = int(request.form.get('num_scrolls', 5))
27
+ country_code = request.form.get('country_code', 'ALL')
28
+
29
+ if not search_query:
30
+ flash('Please enter a search query', 'warning')
31
+ return render_template('facebook_ads/search.html')
32
+
33
+ # Start the scraping task
34
+ task = scrape_facebook_ads.delay(search_query, num_scrolls, country_code, current_user.id)
35
+
36
+ flash(f'Started scraping Facebook ads for "{search_query}". This may take a few minutes.', 'info')
37
+ return render_template('facebook_ads/search.html', task_id=task.id)
38
+
39
+ return render_template('facebook_ads/search.html')
40
+
41
+ @facebook_ads_bp.route('/page-search', methods=['GET', 'POST'])
42
+ @login_required
43
+ def page_search():
44
+ """Search for ads from a specific Facebook page."""
45
+ if request.method == 'POST':
46
+ page_name = request.form.get('page_name', '')
47
+ num_scrolls = int(request.form.get('num_scrolls', 5))
48
+
49
+ if not page_name:
50
+ flash('Please enter a page name', 'warning')
51
+ return render_template('facebook_ads/page_search.html')
52
+
53
+ # Start the scraping task
54
+ task = scrape_facebook_page_ads.delay(page_name, num_scrolls, current_user.id)
55
+
56
+ flash(f'Started scraping Facebook ads for page "{page_name}". This may take a few minutes.', 'info')
57
+ return render_template('facebook_ads/page_search.html', task_id=task.id)
58
+
59
+ return render_template('facebook_ads/page_search.html')
60
+
61
+ @facebook_ads_bp.route('/results', methods=['GET'])
62
+ @login_required
63
+ def results():
64
+ """View Facebook ads results."""
65
+ ad_type = request.args.get('type', 'all')
66
+ query = request.args.get('query', '')
67
+ advertiser = request.args.get('advertiser', '')
68
+
69
+ # Build query
70
+ ads_query = FacebookAd.query
71
+
72
+ if query:
73
+ ads_query = ads_query.filter(FacebookAd.search_query.ilike(f'%{query}%'))
74
+
75
+ if advertiser:
76
+ ads_query = ads_query.filter(FacebookAd.advertiser.ilike(f'%{advertiser}%'))
77
+
78
+ # Get results
79
+ ads = ads_query.order_by(FacebookAd.created_at.desc()).limit(100).all()
80
+
81
+ return render_template('facebook_ads/results.html', ads=ads, query=query, advertiser=advertiser)
82
+
83
+ @facebook_ads_bp.route('/ad/<ad_id>', methods=['GET'])
84
+ @login_required
85
+ def view_ad(ad_id):
86
+ """View details of a specific Facebook ad."""
87
+ ad = FacebookAd.query.get_or_404(ad_id)
88
+ return render_template('facebook_ads/ad_detail.html', ad=ad)
89
+
90
+ @facebook_ads_bp.route('/advertisers', methods=['GET'])
91
+ @login_required
92
+ def advertisers():
93
+ """View list of advertisers."""
94
+ # Get unique advertisers and count their ads
95
+ advertisers_data = db.session.query(
96
+ FacebookAd.advertiser,
97
+ db.func.count(FacebookAd.id).label('ad_count')
98
+ ).group_by(FacebookAd.advertiser).order_by(db.func.count(FacebookAd.id).desc()).limit(100).all()
99
+
100
+ return render_template('facebook_ads/advertisers.html', advertisers=advertisers_data)
101
+
102
+ @facebook_ads_bp.route('/advertiser/<advertiser_name>', methods=['GET'])
103
+ @login_required
104
+ def advertiser_detail(advertiser_name):
105
+ """View details and ads for a specific advertiser."""
106
+ ads = FacebookAd.query.filter(FacebookAd.advertiser == advertiser_name).order_by(FacebookAd.created_at.desc()).all()
107
+ return render_template('facebook_ads/advertiser_detail.html', advertiser=advertiser_name, ads=ads)
108
+
109
+ @facebook_ads_bp.route('/analyze/<ad_id>', methods=['GET'])
110
+ @login_required
111
+ def analyze_ad(ad_id):
112
+ """Analyze a specific Facebook ad."""
113
+ ad = FacebookAd.query.get_or_404(ad_id)
114
+
115
+ # Start the analysis task if not already analyzed
116
+ if not ad.sentiment or not ad.topics:
117
+ task = analyze_facebook_ad.delay(ad_id)
118
+ flash('Started analyzing the ad. Refresh in a few moments to see results.', 'info')
119
+
120
+ return render_template('facebook_ads/ad_analysis.html', ad=ad)
121
+
122
+ @facebook_ads_bp.route('/api/ads', methods=['GET'])
123
+ @login_required
124
+ def api_get_ads():
125
+ """API endpoint to get Facebook Ads data."""
126
+ query = request.args.get('query', '')
127
+ advertiser = request.args.get('advertiser', '')
128
+ limit = int(request.args.get('limit', 50))
129
+
130
+ # Build query
131
+ ads_query = FacebookAd.query
132
+
133
+ if query:
134
+ ads_query = ads_query.filter(
135
+ (FacebookAd.content.ilike(f'%{query}%')) |
136
+ (FacebookAd.search_query.ilike(f'%{query}%'))
137
+ )
138
+
139
+ if advertiser:
140
+ ads_query = ads_query.filter(FacebookAd.advertiser.ilike(f'%{advertiser}%'))
141
+
142
+ # Get results
143
+ ads = ads_query.order_by(FacebookAd.created_at.desc()).limit(limit).all()
144
+
145
+ # Convert to JSON
146
+ result = [ad.to_dict() for ad in ads]
147
+
148
+ return jsonify(result)
149
+
150
+ @facebook_ads_bp.route('/api/advertisers', methods=['GET'])
151
+ @login_required
152
+ def api_get_advertisers():
153
+ """API endpoint to get advertisers data."""
154
+ limit = int(request.args.get('limit', 50))
155
+
156
+ # Get unique advertisers and count their ads
157
+ advertisers_data = db.session.query(
158
+ FacebookAd.advertiser,
159
+ db.func.count(FacebookAd.id).label('ad_count')
160
+ ).group_by(FacebookAd.advertiser).order_by(db.func.count(FacebookAd.id).desc()).limit(limit).all()
161
+
162
+ # Convert to JSON
163
+ result = [{"name": adv[0], "ad_count": adv[1]} for adv in advertisers_data if adv[0]]
164
+
165
+ return jsonify(result)
166
+
167
+ @celery.task
168
+ def scrape_facebook_ads(search_query, num_scrolls, country_code, user_id):
169
+ """Celery task to scrape Facebook ads."""
170
+ try:
171
+ logger.info(f"Starting Facebook ads scraping for query: {search_query}")
172
+
173
+ # Initialize scraper
174
+ scraper = FacebookScraper()
175
+
176
+ # Scrape ads
177
+ ads_data = scraper.scrape_ads(search_query, num_scrolls, country_code)
178
+
179
+ logger.info(f"Scraped {len(ads_data)} Facebook ads")
180
+
181
+ # Process and store ads
182
+ ai_pipeline = AIPipeline()
183
+
184
+ for ad_data in ads_data:
185
+ # Create FacebookAd instance
186
+ ad = FacebookAd.from_scraper_data(ad_data, user_id)
187
+
188
+ # Process with AI if there's content
189
+ if ad.content:
190
+ try:
191
+ # Create a simple object with content for AI processing
192
+ ad_content = type('obj', (object,), {
193
+ 'content': ad.content
194
+ })
195
+
196
+ # Process with AI
197
+ ai_results = ai_pipeline.process_ad(ad_content)
198
+ ad.sentiment = ai_results.get('sentiment')
199
+ except Exception as e:
200
+ logger.error(f"Error processing ad with AI: {e}")
201
+
202
+ # Save to database
203
+ db.session.add(ad)
204
+
205
+ db.session.commit()
206
+ logger.info(f"Saved {len(ads_data)} Facebook ads to database")
207
+
208
+ return {'status': 'success', 'count': len(ads_data)}
209
+
210
+ except Exception as e:
211
+ logger.error(f"Error in Facebook ads scraping task: {e}")
212
+ db.session.rollback()
213
+ return {'status': 'error', 'message': str(e)}
214
+
215
+ @celery.task
216
+ def scrape_facebook_page_ads(page_name, num_scrolls, user_id):
217
+ """Celery task to scrape ads from a specific Facebook page."""
218
+ try:
219
+ logger.info(f"Starting Facebook page ads scraping for page: {page_name}")
220
+
221
+ # Initialize scraper
222
+ scraper = FacebookScraper()
223
+
224
+ # Scrape ads
225
+ ads_data = scraper.scrape_ads_by_page(page_name, num_scrolls)
226
+
227
+ logger.info(f"Scraped {len(ads_data)} Facebook ads from page {page_name}")
228
+
229
+ # Process and store ads
230
+ ai_pipeline = AIPipeline()
231
+
232
+ for ad_data in ads_data:
233
+ # Create FacebookAd instance
234
+ ad = FacebookAd.from_scraper_data(ad_data, user_id)
235
+
236
+ # Process with AI if there's content
237
+ if ad.content:
238
+ try:
239
+ # Create a simple object with content for AI processing
240
+ ad_content = type('obj', (object,), {
241
+ 'content': ad.content
242
+ })
243
+
244
+ # Process with AI
245
+ ai_results = ai_pipeline.process_ad(ad_content)
246
+ ad.sentiment = ai_results.get('sentiment')
247
+ except Exception as e:
248
+ logger.error(f"Error processing ad with AI: {e}")
249
+
250
+ # Save to database
251
+ db.session.add(ad)
252
+
253
+ db.session.commit()
254
+ logger.info(f"Saved {len(ads_data)} Facebook ads to database")
255
+
256
+ return {'status': 'success', 'count': len(ads_data)}
257
+
258
+ except Exception as e:
259
+ logger.error(f"Error in Facebook page ads scraping task: {e}")
260
+ db.session.rollback()
261
+ return {'status': 'error', 'message': str(e)}
262
+
263
+ @celery.task
264
+ def analyze_facebook_ad(ad_id):
265
+ """Celery task to analyze a Facebook ad."""
266
+ try:
267
+ logger.info(f"Starting analysis for Facebook ad: {ad_id}")
268
+
269
+ # Get the ad
270
+ ad = FacebookAd.query.get(ad_id)
271
+
272
+ if not ad:
273
+ logger.error(f"Ad not found: {ad_id}")
274
+ return {'status': 'error', 'message': 'Ad not found'}
275
+
276
+ # Initialize AI pipeline
277
+ ai_pipeline = AIPipeline()
278
+
279
+ # Process with AI if there's content
280
+ if ad.content:
281
+ try:
282
+ # Create a simple object with content for AI processing
283
+ ad_content = type('obj', (object,), {
284
+ 'content': ad.content
285
+ })
286
+
287
+ # Process with AI
288
+ ai_results = ai_pipeline.process_ad(ad_content)
289
+
290
+ # Update ad with results
291
+ ad.sentiment = ai_results.get('sentiment')
292
+ ad.topics = ai_results.get('topics')
293
+ ad.entities = ai_results.get('entities')
294
+
295
+ # Save to database
296
+ db.session.commit()
297
+
298
+ logger.info(f"Successfully analyzed Facebook ad: {ad_id}")
299
+ return {'status': 'success', 'ad_id': ad_id}
300
+ except Exception as e:
301
+ logger.error(f"Error processing ad with AI: {e}")
302
+ return {'status': 'error', 'message': str(e)}
303
+ else:
304
+ logger.warning(f"No content to analyze for ad: {ad_id}")
305
+ return {'status': 'warning', 'message': 'No content to analyze'}
306
+
307
+ except Exception as e:
308
+ logger.error(f"Error in Facebook ad analysis task: {e}")
309
+ db.session.rollback()
310
+ return {'status': 'error', 'message': str(e)}
app/services/__pycache__/ai_processor.cpython-312.pyc CHANGED
Binary files a/app/services/__pycache__/ai_processor.cpython-312.pyc and b/app/services/__pycache__/ai_processor.cpython-312.pyc differ
 
app/services/__pycache__/facebook_scraper.cpython-312.pyc ADDED
Binary file (19.7 kB). View file
 
app/services/ai_processor.py CHANGED
@@ -1,5 +1,10 @@
1
  from pathlib import Path
2
  import logging
 
 
 
 
 
3
 
4
  logger = logging.getLogger(__name__)
5
 
@@ -9,89 +14,124 @@ class ProcessingError(Exception):
9
 
10
  class AIPipeline:
11
  def __init__(self):
12
- self.nlp = None # Initialize as None
13
- self.detector = None
14
  try:
15
- model_dir = Path("app/models")
16
- weights_path = model_dir / "yolov4.weights"
17
- config_path = model_dir / "yolov4.cfg"
18
 
19
- if not (weights_path.exists() and config_path.exists()):
20
- logger.warning("YOLOv4 files not found. Please run setup_yolo.py first.")
21
- else:
22
- # Lazy load OpenCV
23
- self._setup_detector(str(weights_path), str(config_path))
24
-
25
  except Exception as e:
26
  logger.error(f"Error initializing AI Pipeline: {e}")
27
  raise
28
 
29
- def _setup_detector(self, weights_path, config_path):
30
- """Set up the object detector with the given weights and config."""
31
- try:
32
- import cv2
33
- self.detector = cv2.dnn.readNet(weights_path, config_path)
34
- except Exception as e:
35
- logger.error(f"Error setting up detector: {e}")
36
- self.detector = None
37
-
38
- def _ensure_nlp_loaded(self):
39
- """Ensure NLP model is loaded before use."""
40
- if self.nlp is None:
41
- try:
42
- logger.info("Loading NLP model...")
43
- # Import transformers only when needed
44
- from transformers import pipeline
45
- self.nlp = pipeline("text-classification", model="roberta-base")
46
- logger.info("NLP model loaded successfully")
47
- except Exception as e:
48
- logger.error(f"Error loading NLP model: {e}")
49
- raise
50
-
51
- def process_ad(self, ad):
52
- if not ad:
53
- raise ValueError("Ad content cannot be empty")
54
-
55
  try:
56
- results = {
57
- "sentiment": self._analyze_sentiment(ad.content),
58
- "ocr": self._extract_ocr(ad.media) if hasattr(ad, 'media') else None,
59
- "objects": self._detect_objects(ad.media) if hasattr(ad, 'media') else None
60
- }
61
- return results
 
 
 
 
 
 
 
 
 
 
 
62
  except Exception as e:
63
- logger.error(f"Error processing ad: {str(e)}")
64
- raise ProcessingError(f"Failed to process ad: {str(e)}")
65
 
66
- def _analyze_sentiment(self, text):
67
- if not text:
68
- return None
69
  try:
70
- self._ensure_nlp_loaded() # Load model if needed
71
- return self.nlp(text)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  except Exception as e:
73
- logger.error(f"Sentiment analysis error: {e}")
74
- return None
75
 
76
- def _extract_ocr(self, media):
77
- if not media or not hasattr(media, 'type') or media.type != "image":
78
- return None
79
  try:
80
- import pytesseract
81
- return pytesseract.image_to_string(media.path)
 
 
 
 
 
 
 
 
 
 
82
  except Exception as e:
83
- logger.error(f"OCR error: {e}")
84
- return None
85
 
86
- def _detect_objects(self, media):
87
- if not media or not hasattr(media, 'type') or media.type != "image" or not self.detector:
88
- return None
89
  try:
90
- import cv2
91
- img = cv2.imread(media.path)
92
- blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), swapRB=True, crop=False)
93
- self.detector.setInput(blob)
94
- return self.detector.forward()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  except Exception as e:
96
- logger.error(f"Object detection error: {e}")
97
- return None
 
 
 
 
 
1
  from pathlib import Path
2
  import logging
3
+ import json
4
+ from transformers import pipeline
5
+ from textblob import TextBlob
6
+ import spacy
7
+ import re
8
 
9
  logger = logging.getLogger(__name__)
10
 
 
14
 
15
  class AIPipeline:
16
  def __init__(self):
17
+ """Initialize the AI pipeline with necessary models."""
 
18
  try:
19
+ # Load spaCy model for NER and topic extraction
20
+ self.nlp = spacy.load('en_core_web_sm')
 
21
 
22
+ # Initialize sentiment analyzer
23
+ self.sentiment = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
24
+
25
+ logger.info("AI Pipeline initialized successfully")
 
 
26
  except Exception as e:
27
  logger.error(f"Error initializing AI Pipeline: {e}")
28
  raise
29
 
30
+ def _analyze_sentiment(self, text: str) -> float:
31
+ """Analyze sentiment of text and return a score between -1 and 1."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  try:
33
+ # Use transformers for initial sentiment
34
+ result = self.sentiment(text)[0]
35
+
36
+ # Convert POSITIVE/NEGATIVE to float
37
+ if result['label'] == 'POSITIVE':
38
+ score = result['score']
39
+ else:
40
+ score = -result['score']
41
+
42
+ # Use TextBlob for additional nuance
43
+ blob = TextBlob(text)
44
+ blob_score = blob.sentiment.polarity
45
+
46
+ # Average the scores
47
+ final_score = (score + blob_score) / 2
48
+
49
+ return final_score
50
  except Exception as e:
51
+ logger.error(f"Error in sentiment analysis: {e}")
52
+ return 0.0
53
 
54
+ def _extract_topics(self, text: str) -> list:
55
+ """Extract main topics from text."""
 
56
  try:
57
+ doc = self.nlp(text)
58
+
59
+ # Extract noun phrases as potential topics
60
+ noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks]
61
+
62
+ # Extract named entities that might be topics
63
+ entities = [ent.text.lower() for ent in doc.ents
64
+ if ent.label_ in ['ORG', 'PRODUCT', 'EVENT', 'WORK_OF_ART']]
65
+
66
+ # Combine and clean topics
67
+ all_topics = noun_phrases + entities
68
+
69
+ # Clean and filter topics
70
+ cleaned_topics = []
71
+ for topic in all_topics:
72
+ # Remove special characters and extra whitespace
73
+ topic = re.sub(r'[^\w\s]', '', topic)
74
+ topic = ' '.join(topic.split())
75
+
76
+ # Filter out short or common words
77
+ if len(topic) > 3 and topic not in ['the', 'this', 'that', 'these', 'those']:
78
+ cleaned_topics.append(topic)
79
+
80
+ # Remove duplicates and limit to top 5
81
+ unique_topics = list(set(cleaned_topics))
82
+ return sorted(unique_topics)[:5]
83
  except Exception as e:
84
+ logger.error(f"Error in topic extraction: {e}")
85
+ return []
86
 
87
+ def _extract_entities(self, text: str) -> list:
88
+ """Extract named entities from text."""
 
89
  try:
90
+ doc = self.nlp(text)
91
+
92
+ entities = []
93
+ for ent in doc.ents:
94
+ entity = {
95
+ 'text': ent.text,
96
+ 'type': ent.label_,
97
+ 'description': spacy.explain(ent.label_)
98
+ }
99
+ entities.append(entity)
100
+
101
+ return entities
102
  except Exception as e:
103
+ logger.error(f"Error in entity extraction: {e}")
104
+ return []
105
 
106
+ def process_ad(self, ad) -> dict:
107
+ """Process an ad and return analysis results."""
 
108
  try:
109
+ # Ensure we have content to analyze
110
+ if not hasattr(ad, 'content') or not ad.content:
111
+ return {
112
+ 'sentiment': 0.0,
113
+ 'topics': [],
114
+ 'entities': []
115
+ }
116
+
117
+ # Analyze sentiment
118
+ sentiment = self._analyze_sentiment(ad.content)
119
+
120
+ # Extract topics
121
+ topics = self._extract_topics(ad.content)
122
+
123
+ # Extract entities
124
+ entities = self._extract_entities(ad.content)
125
+
126
+ return {
127
+ 'sentiment': sentiment,
128
+ 'topics': topics,
129
+ 'entities': entities
130
+ }
131
  except Exception as e:
132
+ logger.error(f"Error in ad processing: {e}")
133
+ return {
134
+ 'sentiment': 0.0,
135
+ 'topics': [],
136
+ 'entities': []
137
+ }
app/services/facebook_scraper.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from selenium import webdriver
2
+ from selenium.webdriver.common.by import By
3
+ from selenium.webdriver.chrome.service import Service
4
+ from selenium.webdriver.support.ui import WebDriverWait
5
+ from selenium.webdriver.support import expected_conditions as EC
6
+ from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ import time
9
+ import json
10
+ import logging
11
+ import re
12
+ from datetime import datetime
13
+ from contextlib import contextmanager
14
+ from typing import List, Dict, Any, Optional
15
+ from bs4 import BeautifulSoup
16
+ from urllib.parse import urlparse, parse_qs
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class FacebookScraper:
21
+ """
22
+ Enhanced Facebook Ads Library scraper with improved robustness and features.
23
+ """
24
+
25
+ def __init__(self, headless: bool = True, timeout: int = 10, use_proxy: bool = False, proxy: str = None):
26
+ """
27
+ Initialize the Facebook scraper with configurable options.
28
+
29
+ Args:
30
+ headless: Whether to run the browser in headless mode
31
+ timeout: Default timeout for waiting operations in seconds
32
+ use_proxy: Whether to use a proxy
33
+ proxy: Proxy server address (e.g., "http://user:pass@ip:port")
34
+ """
35
+ self.driver = None
36
+ self.headless = headless
37
+ self.timeout = timeout
38
+ self.use_proxy = use_proxy
39
+ self.proxy = proxy
40
+
41
+ def _setup_driver(self):
42
+ """Configure and initialize the Chrome WebDriver with optimal settings."""
43
+ options = webdriver.ChromeOptions()
44
+
45
+ if self.headless:
46
+ options.add_argument("--headless")
47
+
48
+ # Add common options for stability
49
+ options.add_argument("--no-sandbox")
50
+ options.add_argument("--disable-dev-shm-usage")
51
+ options.add_argument("--disable-gpu")
52
+ options.add_argument("--window-size=1920,1080")
53
+
54
+ # Add user agent to appear more like a regular browser
55
+ options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36")
56
+
57
+ # Add proxy if specified
58
+ if self.use_proxy and self.proxy:
59
+ options.add_argument(f'--proxy-server={self.proxy}')
60
+
61
+ # Disable automation flags to avoid detection
62
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
63
+ options.add_experimental_option('useAutomationExtension', False)
64
+
65
+ return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
66
+
67
+ @contextmanager
68
+ def _get_driver(self):
69
+ """Context manager for browser session to ensure proper cleanup."""
70
+ try:
71
+ self.driver = self._setup_driver()
72
+ yield self.driver
73
+ except Exception as e:
74
+ logger.error(f"Error initializing WebDriver: {e}")
75
+ raise
76
+ finally:
77
+ if self.driver:
78
+ self.driver.quit()
79
+
80
+ def _wait_for_element(self, driver, selector: str, by: By = By.CSS_SELECTOR, timeout: int = None) -> Any:
81
+ """
82
+ Wait for an element to be present and return it.
83
+
84
+ Args:
85
+ driver: WebDriver instance
86
+ selector: Element selector
87
+ by: Selector type (CSS, XPATH, etc.)
88
+ timeout: Wait timeout in seconds
89
+
90
+ Returns:
91
+ The found web element
92
+ """
93
+ if timeout is None:
94
+ timeout = self.timeout
95
+
96
+ wait = WebDriverWait(driver, timeout)
97
+ return wait.until(EC.presence_of_element_located((by, selector)))
98
+
99
+ def _wait_for_elements(self, driver, selector: str, by: By = By.CSS_SELECTOR, timeout: int = None) -> List[Any]:
100
+ """
101
+ Wait for elements to be present and return them.
102
+
103
+ Args:
104
+ driver: WebDriver instance
105
+ selector: Elements selector
106
+ by: Selector type (CSS, XPATH, etc.)
107
+ timeout: Wait timeout in seconds
108
+
109
+ Returns:
110
+ List of found web elements
111
+ """
112
+ if timeout is None:
113
+ timeout = self.timeout
114
+
115
+ wait = WebDriverWait(driver, timeout)
116
+ return wait.until(EC.presence_of_all_elements_located((by, selector)))
117
+
118
+ def _scroll_to_load_more(self, driver, scroll_count: int = 5, scroll_pause: float = 2.0):
119
+ """
120
+ Scroll down the page to load more content.
121
+
122
+ Args:
123
+ driver: WebDriver instance
124
+ scroll_count: Number of times to scroll
125
+ scroll_pause: Pause between scrolls in seconds
126
+ """
127
+ for i in range(scroll_count):
128
+ # Scroll down to bottom
129
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
130
+
131
+ # Wait to load page
132
+ time.sleep(scroll_pause)
133
+
134
+ # Log progress
135
+ logger.debug(f"Completed scroll {i+1}/{scroll_count}")
136
+
137
+ def _extract_ad_details(self, ad_element) -> Dict[str, Any]:
138
+ """
139
+ Extract detailed information from an ad element.
140
+
141
+ Args:
142
+ ad_element: WebElement containing the ad
143
+
144
+ Returns:
145
+ Dictionary with ad details
146
+ """
147
+ ad_data = {
148
+ "scrape_time": datetime.now().isoformat(),
149
+ "platform": "facebook",
150
+ "raw_text": ad_element.text
151
+ }
152
+
153
+ try:
154
+ # Try to extract advertiser name
155
+ advertiser_elem = ad_element.find_elements(By.CSS_SELECTOR, "span[dir='auto']")
156
+ if advertiser_elem:
157
+ ad_data["advertiser"] = advertiser_elem[0].text
158
+
159
+ # Try to extract ad content
160
+ content_elem = ad_element.find_elements(By.CSS_SELECTOR, "div[dir='auto']")
161
+ if content_elem:
162
+ ad_data["content"] = "\n".join([elem.text for elem in content_elem])
163
+
164
+ # Try to extract images
165
+ img_elems = ad_element.find_elements(By.TAG_NAME, "img")
166
+ if img_elems:
167
+ ad_data["images"] = [img.get_attribute("src") for img in img_elems if img.get_attribute("src")]
168
+
169
+ # Try to extract links
170
+ link_elems = ad_element.find_elements(By.TAG_NAME, "a")
171
+ if link_elems:
172
+ ad_data["links"] = [link.get_attribute("href") for link in link_elems if link.get_attribute("href")]
173
+
174
+ # Try to extract ad ID from URL
175
+ if "links" in ad_data and ad_data["links"]:
176
+ for link in ad_data["links"]:
177
+ id_match = re.search(r'id=(\d+)', link)
178
+ if id_match:
179
+ ad_data["ad_id"] = id_match.group(1)
180
+ break
181
+
182
+ except Exception as e:
183
+ logger.warning(f"Error extracting ad details: {e}")
184
+
185
+ return ad_data
186
+
187
+ def scrape_ads(self, search_query: str, num_scrolls: int = 5, country_code: str = "ALL") -> List[Dict[str, Any]]:
188
+ """
189
+ Scrape ads from Facebook Ads Library based on a search query.
190
+
191
+ Args:
192
+ search_query: Keyword to search for
193
+ num_scrolls: Number of times to scroll to load more ads
194
+ country_code: Country code filter (e.g., "US", "GB", "ALL")
195
+
196
+ Returns:
197
+ List of dictionaries containing ad information
198
+ """
199
+ with self._get_driver() as driver:
200
+ try:
201
+ # Construct URL with parameters
202
+ url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country={country_code}&q={search_query}&search_type=keyword"
203
+ logger.info(f"Accessing Facebook Ads Library: {url}")
204
+
205
+ # Navigate to the URL
206
+ driver.get(url)
207
+
208
+ # Wait for initial content to load
209
+ try:
210
+ self._wait_for_element(driver, "div[role='main']")
211
+ except TimeoutException:
212
+ logger.warning("Timeout waiting for main content to load")
213
+
214
+ # Scroll to load more ads
215
+ self._scroll_to_load_more(driver, num_scrolls)
216
+
217
+ # Find all ad elements
218
+ # Try multiple selectors as Facebook might change their structure
219
+ selectors = [
220
+ "div.x1yztbdb", # Current selector
221
+ "div[role='article']", # Alternative selector
222
+ "div.x1iorvi4" # Another possible selector
223
+ ]
224
+
225
+ ad_elements = []
226
+ for selector in selectors:
227
+ try:
228
+ elements = driver.find_elements(By.CSS_SELECTOR, selector)
229
+ if elements:
230
+ ad_elements = elements
231
+ logger.info(f"Found {len(elements)} ads using selector: {selector}")
232
+ break
233
+ except Exception as e:
234
+ logger.debug(f"Selector {selector} failed: {e}")
235
+
236
+ if not ad_elements:
237
+ logger.warning("No ad elements found with any selector")
238
+ return []
239
+
240
+ # Extract detailed information from each ad
241
+ ads_data = []
242
+ for i, ad_element in enumerate(ad_elements):
243
+ try:
244
+ ad_data = self._extract_ad_details(ad_element)
245
+ ad_data["position"] = i + 1
246
+ ad_data["search_query"] = search_query
247
+ ads_data.append(ad_data)
248
+ except Exception as e:
249
+ logger.error(f"Error processing ad {i+1}: {e}")
250
+
251
+ logger.info(f"Successfully scraped {len(ads_data)} ads")
252
+ return ads_data
253
+
254
+ except (TimeoutException, WebDriverException) as e:
255
+ logger.error(f"Error during scraping: {e}")
256
+ return []
257
+
258
+ def scrape_advertiser_details(self, advertiser_id: str) -> Dict[str, Any]:
259
+ """
260
+ Scrape details about a specific advertiser.
261
+
262
+ Args:
263
+ advertiser_id: Facebook ID of the advertiser
264
+
265
+ Returns:
266
+ Dictionary with advertiser information
267
+ """
268
+ with self._get_driver() as driver:
269
+ try:
270
+ url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&view_all_page_id={advertiser_id}"
271
+ logger.info(f"Accessing advertiser page: {url}")
272
+
273
+ driver.get(url)
274
+
275
+ # Wait for page to load
276
+ try:
277
+ self._wait_for_element(driver, "div[role='main']")
278
+ except TimeoutException:
279
+ logger.warning("Timeout waiting for advertiser page to load")
280
+
281
+ # Extract advertiser information
282
+ advertiser_data = {
283
+ "id": advertiser_id,
284
+ "scrape_time": datetime.now().isoformat()
285
+ }
286
+
287
+ # Try to get advertiser name
288
+ try:
289
+ name_elem = self._wait_for_element(driver, "div[role='main'] h1", timeout=5)
290
+ advertiser_data["name"] = name_elem.text
291
+ except:
292
+ pass
293
+
294
+ # Try to get ad count
295
+ try:
296
+ count_text = driver.find_element(By.XPATH, "//div[contains(text(), 'ads')]").text
297
+ count_match = re.search(r'(\d+)\s+ads', count_text)
298
+ if count_match:
299
+ advertiser_data["ad_count"] = int(count_match.group(1))
300
+ except:
301
+ pass
302
+
303
+ # Scroll to load some ads
304
+ self._scroll_to_load_more(driver, 3)
305
+
306
+ # Get sample ads
307
+ ad_elements = driver.find_elements(By.CSS_SELECTOR, "div.x1yztbdb")
308
+ sample_ads = []
309
+
310
+ for i, ad_element in enumerate(ad_elements[:5]): # Get up to 5 sample ads
311
+ try:
312
+ ad_data = self._extract_ad_details(ad_element)
313
+ sample_ads.append(ad_data)
314
+ except Exception as e:
315
+ logger.error(f"Error processing sample ad {i+1}: {e}")
316
+
317
+ advertiser_data["sample_ads"] = sample_ads
318
+ advertiser_data["sample_ad_count"] = len(sample_ads)
319
+
320
+ return advertiser_data
321
+
322
+ except Exception as e:
323
+ logger.error(f"Error scraping advertiser details: {e}")
324
+ return {"id": advertiser_id, "error": str(e)}
325
+
326
+ def scrape_ads_by_topic(self, topic: str, num_scrolls: int = 5, country_code: str = "ALL") -> List[Dict[str, Any]]:
327
+ """
328
+ Scrape ads related to a specific topic.
329
+
330
+ Args:
331
+ topic: Topic to search for (e.g., "politics", "health", "finance")
332
+ num_scrolls: Number of times to scroll to load more ads
333
+ country_code: Country code filter
334
+
335
+ Returns:
336
+ List of dictionaries containing ad information
337
+ """
338
+ # This is essentially the same as scrape_ads but with a different name for clarity
339
+ return self.scrape_ads(topic, num_scrolls, country_code)
340
+
341
+ def scrape_ads_by_page(self, page_name: str, num_scrolls: int = 5) -> List[Dict[str, Any]]:
342
+ """
343
+ Scrape ads from a specific Facebook page.
344
+
345
+ Args:
346
+ page_name: Name of the Facebook page
347
+ num_scrolls: Number of times to scroll to load more ads
348
+
349
+ Returns:
350
+ List of dictionaries containing ad information
351
+ """
352
+ with self._get_driver() as driver:
353
+ try:
354
+ # First, try to find the page ID
355
+ search_url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&q={page_name}&search_type=page"
356
+ logger.info(f"Searching for page: {search_url}")
357
+
358
+ driver.get(search_url)
359
+
360
+ # Wait for search results
361
+ try:
362
+ self._wait_for_element(driver, "div[role='main']")
363
+ except TimeoutException:
364
+ logger.warning("Timeout waiting for page search results")
365
+
366
+ # Try to find and click on the first page result
367
+ try:
368
+ page_links = driver.find_elements(By.CSS_SELECTOR, "a[href*='view_all_page_id=']")
369
+ if page_links:
370
+ # Extract page ID from URL
371
+ href = page_links[0].get_attribute("href")
372
+ page_id_match = re.search(r'view_all_page_id=(\d+)', href)
373
+
374
+ if page_id_match:
375
+ page_id = page_id_match.group(1)
376
+ logger.info(f"Found page ID: {page_id}")
377
+
378
+ # Navigate directly to page's ads
379
+ page_url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&view_all_page_id={page_id}"
380
+ driver.get(page_url)
381
+
382
+ # Wait for page to load
383
+ try:
384
+ self._wait_for_element(driver, "div[role='main']")
385
+ except TimeoutException:
386
+ logger.warning("Timeout waiting for page ads to load")
387
+
388
+ # Scroll to load more ads
389
+ self._scroll_to_load_more(driver, num_scrolls)
390
+
391
+ # Find all ad elements
392
+ ad_elements = driver.find_elements(By.CSS_SELECTOR, "div.x1yztbdb")
393
+
394
+ # Extract detailed information from each ad
395
+ ads_data = []
396
+ for i, ad_element in enumerate(ad_elements):
397
+ try:
398
+ ad_data = self._extract_ad_details(ad_element)
399
+ ad_data["position"] = i + 1
400
+ ad_data["page_name"] = page_name
401
+ ad_data["page_id"] = page_id
402
+ ads_data.append(ad_data)
403
+ except Exception as e:
404
+ logger.error(f"Error processing ad {i+1}: {e}")
405
+
406
+ logger.info(f"Successfully scraped {len(ads_data)} ads from page {page_name}")
407
+ return ads_data
408
+ except Exception as e:
409
+ logger.error(f"Error finding page: {e}")
410
+
411
+ # If we couldn't find the page, fall back to regular search
412
+ logger.warning(f"Could not find page {page_name}, falling back to keyword search")
413
+ return self.scrape_ads(page_name, num_scrolls)
414
+
415
+ except Exception as e:
416
+ logger.error(f"Error during page scraping: {e}")
417
+ return []
app/templates/base.html CHANGED
@@ -3,11 +3,12 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>{% block title %}Facebook Ad Analytics{% endblock %}</title>
7
  <!-- Bootstrap CSS -->
8
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet">
9
  <!-- Custom CSS -->
10
  <link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
 
11
  {% block head_extra %}{% endblock %}
12
  </head>
13
  <body>
@@ -26,7 +27,14 @@
26
  <a class="nav-link" href="{{ url_for('google_ads.index') }}">Google Ads</a>
27
  </li>
28
  <li class="nav-item">
29
- <a class="nav-link" href="{{ url_for('compliance.compliance_report') }}">Compliance</a>
 
 
 
 
 
 
 
30
  </li>
31
  </ul>
32
  <ul class="navbar-nav">
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>{% block title %}Facebook Ad Analytics{% endblock %} - Ad Analysis Tool</title>
7
  <!-- Bootstrap CSS -->
8
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet">
9
  <!-- Custom CSS -->
10
  <link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
11
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
12
  {% block head_extra %}{% endblock %}
13
  </head>
14
  <body>
 
27
  <a class="nav-link" href="{{ url_for('google_ads.index') }}">Google Ads</a>
28
  </li>
29
  <li class="nav-item">
30
+ <a class="nav-link" href="{{ url_for('facebook_ads.index') }}">
31
+ <i class="fab fa-facebook"></i> Facebook Ads
32
+ </a>
33
+ </li>
34
+ <li class="nav-item">
35
+ <a class="nav-link" href="{{ url_for('compliance.compliance_report') }}">
36
+ <i class="fas fa-check-circle"></i> Compliance
37
+ </a>
38
  </li>
39
  </ul>
40
  <ul class="navbar-nav">
app/templates/facebook_ads/ad_analysis.html ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Ad Analysis - {{ ad.advertiser }}{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container mt-4">
7
+ <nav aria-label="breadcrumb">
8
+ <ol class="breadcrumb">
9
+ <li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.index') }}">Dashboard</a></li>
10
+ <li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.results') }}">Results</a></li>
11
+ <li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.view_ad', ad_id=ad.id) }}">Ad Details</a></li>
12
+ <li class="breadcrumb-item active" aria-current="page">Analysis</li>
13
+ </ol>
14
+ </nav>
15
+
16
+ <div class="card mb-4">
17
+ <div class="card-header">
18
+ <h2 class="mb-0">Ad Analysis</h2>
19
+ </div>
20
+ <div class="card-body">
21
+ <div class="row mb-4">
22
+ <div class="col-md-12">
23
+ <h4>Original Content</h4>
24
+ <p class="lead">{{ ad.content }}</p>
25
+ </div>
26
+ </div>
27
+
28
+ <div class="row">
29
+ <div class="col-md-6">
30
+ <div class="card mb-4">
31
+ <div class="card-body">
32
+ <h4>Sentiment Analysis</h4>
33
+ {% if ad.sentiment is not none %}
34
+ <div class="progress mb-3">
35
+ {% set sentiment_percent = ((ad.sentiment + 1) / 2) * 100 %}
36
+ <div class="progress-bar bg-{{ 'success' if ad.sentiment > 0 else 'danger' if ad.sentiment < 0 else 'secondary' }}"
37
+ role="progressbar"
38
+ style="width: {{ sentiment_percent }}%"
39
+ aria-valuenow="{{ sentiment_percent }}"
40
+ aria-valuemin="0"
41
+ aria-valuemax="100">
42
+ {{ "Positive" if ad.sentiment > 0 else "Negative" if ad.sentiment < 0 else "Neutral" }}
43
+ </div>
44
+ </div>
45
+ <p>
46
+ <strong>Score:</strong> {{ "%.2f"|format(ad.sentiment) }}<br>
47
+ <small class="text-muted">
48
+ Scores range from -1 (very negative) to +1 (very positive)
49
+ </small>
50
+ </p>
51
+ {% else %}
52
+ <div class="alert alert-info">
53
+ Sentiment analysis is in progress...
54
+ </div>
55
+ {% endif %}
56
+ </div>
57
+ </div>
58
+ </div>
59
+
60
+ <div class="col-md-6">
61
+ <div class="card mb-4">
62
+ <div class="card-body">
63
+ <h4>Topics</h4>
64
+ {% if ad.topics %}
65
+ <div class="mb-3">
66
+ {% for topic in ad.topics %}
67
+ <span class="badge bg-info me-2 mb-2">{{ topic }}</span>
68
+ {% endfor %}
69
+ </div>
70
+ <small class="text-muted">
71
+ Topics are extracted using natural language processing
72
+ </small>
73
+ {% else %}
74
+ <div class="alert alert-info">
75
+ Topic analysis is in progress...
76
+ </div>
77
+ {% endif %}
78
+ </div>
79
+ </div>
80
+ </div>
81
+ </div>
82
+
83
+ {% if ad.entities %}
84
+ <div class="card mb-4">
85
+ <div class="card-body">
86
+ <h4>Named Entities</h4>
87
+ <div class="table-responsive">
88
+ <table class="table table-striped">
89
+ <thead>
90
+ <tr>
91
+ <th>Entity</th>
92
+ <th>Type</th>
93
+ <th>Description</th>
94
+ </tr>
95
+ </thead>
96
+ <tbody>
97
+ {% for entity in ad.entities %}
98
+ <tr>
99
+ <td>{{ entity.text }}</td>
100
+ <td><span class="badge bg-secondary">{{ entity.type }}</span></td>
101
+ <td>{{ entity.description or 'N/A' }}</td>
102
+ </tr>
103
+ {% endfor %}
104
+ </tbody>
105
+ </table>
106
+ </div>
107
+ <small class="text-muted">
108
+ Named entities are important words or phrases that represent specific concepts
109
+ </small>
110
+ </div>
111
+ </div>
112
+ {% endif %}
113
+
114
+ {% if not ad.sentiment or not ad.topics %}
115
+ <div class="alert alert-warning">
116
+ <h4 class="alert-heading">Analysis in Progress</h4>
117
+ <p>The ad content is being analyzed. This process may take a few moments. Please refresh the page to see updated results.</p>
118
+ </div>
119
+ {% endif %}
120
+ </div>
121
+ </div>
122
+ </div>
123
+ {% endblock %}
app/templates/facebook_ads/ad_detail.html ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Facebook Ad Details{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container mt-4">
7
+ <nav aria-label="breadcrumb">
8
+ <ol class="breadcrumb">
9
+ <li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.index') }}">Dashboard</a></li>
10
+ <li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.results') }}">Results</a></li>
11
+ <li class="breadcrumb-item active" aria-current="page">Ad Details</li>
12
+ </ol>
13
+ </nav>
14
+
15
+ <div class="card mb-4">
16
+ <div class="card-header">
17
+ <h2 class="mb-0">
18
+ <a href="{{ url_for('facebook_ads.advertiser_detail', advertiser_name=ad.advertiser) }}">
19
+ {{ ad.advertiser }}
20
+ </a>
21
+ </h2>
22
+ </div>
23
+ <div class="card-body">
24
+ {% if ad.image_urls %}
25
+ <div class="row mb-4">
26
+ {% for image_url in ad.image_urls %}
27
+ <div class="col-md-6 mb-3">
28
+ <img src="{{ image_url }}" class="img-fluid rounded" alt="Ad Image {{ loop.index }}">
29
+ </div>
30
+ {% endfor %}
31
+ </div>
32
+ {% endif %}
33
+
34
+ <div class="mb-4">
35
+ <h4>Ad Content</h4>
36
+ <p class="lead">{{ ad.content }}</p>
37
+ </div>
38
+
39
+ {% if ad.links %}
40
+ <div class="mb-4">
41
+ <h4>Links</h4>
42
+ <ul class="list-group">
43
+ {% for link in ad.links %}
44
+ <li class="list-group-item">
45
+ <a href="{{ link }}" target="_blank" rel="noopener noreferrer">{{ link }}</a>
46
+ </li>
47
+ {% endfor %}
48
+ </ul>
49
+ </div>
50
+ {% endif %}
51
+
52
+ <div class="row">
53
+ <div class="col-md-6">
54
+ {% if ad.sentiment is not none %}
55
+ <div class="mb-4">
56
+ <h4>Sentiment Analysis</h4>
57
+ <div class="progress">
58
+ {% set sentiment_percent = ((ad.sentiment + 1) / 2) * 100 %}
59
+ <div class="progress-bar bg-{{ 'success' if ad.sentiment > 0 else 'danger' if ad.sentiment < 0 else 'secondary' }}"
60
+ role="progressbar"
61
+ style="width: {{ sentiment_percent }}%"
62
+ aria-valuenow="{{ sentiment_percent }}"
63
+ aria-valuemin="0"
64
+ aria-valuemax="100">
65
+ {{ "Positive" if ad.sentiment > 0 else "Negative" if ad.sentiment < 0 else "Neutral" }}
66
+ </div>
67
+ </div>
68
+ <small class="text-muted">Score: {{ "%.2f"|format(ad.sentiment) }}</small>
69
+ </div>
70
+ {% endif %}
71
+ </div>
72
+
73
+ <div class="col-md-6">
74
+ {% if ad.topics %}
75
+ <div class="mb-4">
76
+ <h4>Topics</h4>
77
+ {% for topic in ad.topics %}
78
+ <span class="badge bg-info me-2 mb-2">{{ topic }}</span>
79
+ {% endfor %}
80
+ </div>
81
+ {% endif %}
82
+ </div>
83
+ </div>
84
+
85
+ {% if ad.entities %}
86
+ <div class="mb-4">
87
+ <h4>Entities</h4>
88
+ <div class="table-responsive">
89
+ <table class="table table-striped">
90
+ <thead>
91
+ <tr>
92
+ <th>Entity</th>
93
+ <th>Type</th>
94
+ </tr>
95
+ </thead>
96
+ <tbody>
97
+ {% for entity in ad.entities %}
98
+ <tr>
99
+ <td>{{ entity.text }}</td>
100
+ <td><span class="badge bg-secondary">{{ entity.type }}</span></td>
101
+ </tr>
102
+ {% endfor %}
103
+ </tbody>
104
+ </table>
105
+ </div>
106
+ </div>
107
+ {% endif %}
108
+
109
+ {% if not ad.sentiment or not ad.topics %}
110
+ <div class="mt-4">
111
+ <a href="{{ url_for('facebook_ads.analyze_ad', ad_id=ad.id) }}" class="btn btn-primary">
112
+ Analyze Ad Content
113
+ </a>
114
+ </div>
115
+ {% endif %}
116
+ </div>
117
+ <div class="card-footer text-muted">
118
+ <div class="row">
119
+ <div class="col-md-6">
120
+ Scraped: {{ ad.created_at.strftime('%Y-%m-%d %H:%M:%S') }}
121
+ </div>
122
+ <div class="col-md-6 text-end">
123
+ Search Query: {{ ad.search_query or 'N/A' }}
124
+ </div>
125
+ </div>
126
+ </div>
127
+ </div>
128
+ </div>
129
+ {% endblock %}
app/templates/facebook_ads/advertiser_detail.html ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}{{ advertiser }} - Facebook Ads{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container mt-4">
7
+ <nav aria-label="breadcrumb">
8
+ <ol class="breadcrumb">
9
+ <li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.index') }}">Dashboard</a></li>
10
+ <li class="breadcrumb-item"><a href="{{ url_for('facebook_ads.advertisers') }}">Advertisers</a></li>
11
+ <li class="breadcrumb-item active" aria-current="page">{{ advertiser }}</li>
12
+ </ol>
13
+ </nav>
14
+
15
+ <div class="row mb-4">
16
+ <div class="col">
17
+ <h1>{{ advertiser }}</h1>
18
+ </div>
19
+ <div class="col-auto">
20
+ <a href="{{ url_for('facebook_ads.page_search') }}?page_name={{ advertiser }}"
21
+ class="btn btn-primary">
22
+ Scrape More Ads
23
+ </a>
24
+ </div>
25
+ </div>
26
+
27
+ {% if ads %}
28
+ <div class="row">
29
+ <div class="col-md-4 mb-4">
30
+ <div class="card">
31
+ <div class="card-body">
32
+ <h5 class="card-title">Statistics</h5>
33
+ <ul class="list-unstyled">
34
+ <li>Total Ads: {{ ads|length }}</li>
35
+ <li>First Ad: {{ ads[-1].created_at.strftime('%Y-%m-%d') }}</li>
36
+ <li>Latest Ad: {{ ads[0].created_at.strftime('%Y-%m-%d') }}</li>
37
+ </ul>
38
+ </div>
39
+ </div>
40
+ </div>
41
+
42
+ <div class="col-md-8 mb-4">
43
+ <div class="card">
44
+ <div class="card-body">
45
+ <h5 class="card-title">Sentiment Overview</h5>
46
+ {% set positive = namespace(count=0) %}
47
+ {% set negative = namespace(count=0) %}
48
+ {% set neutral = namespace(count=0) %}
49
+ {% for ad in ads %}
50
+ {% if ad.sentiment is not none %}
51
+ {% if ad.sentiment > 0 %}
52
+ {% set positive.count = positive.count + 1 %}
53
+ {% elif ad.sentiment < 0 %}
54
+ {% set negative.count = negative.count + 1 %}
55
+ {% else %}
56
+ {% set neutral.count = neutral.count + 1 %}
57
+ {% endif %}
58
+ {% endif %}
59
+ {% endfor %}
60
+
61
+ <div class="progress">
62
+ {% set total = positive.count + negative.count + neutral.count %}
63
+ {% if total > 0 %}
64
+ <div class="progress-bar bg-success" role="progressbar"
65
+ style="width: {{ (positive.count / total * 100)|round }}%">
66
+ {{ positive.count }}
67
+ </div>
68
+ <div class="progress-bar bg-secondary" role="progressbar"
69
+ style="width: {{ (neutral.count / total * 100)|round }}%">
70
+ {{ neutral.count }}
71
+ </div>
72
+ <div class="progress-bar bg-danger" role="progressbar"
73
+ style="width: {{ (negative.count / total * 100)|round }}%">
74
+ {{ negative.count }}
75
+ </div>
76
+ {% endif %}
77
+ </div>
78
+ <div class="mt-2">
79
+ <small class="text-muted">
80
+ Positive: {{ positive.count }},
81
+ Neutral: {{ neutral.count }},
82
+ Negative: {{ negative.count }}
83
+ </small>
84
+ </div>
85
+ </div>
86
+ </div>
87
+ </div>
88
+ </div>
89
+
90
+ <div class="row">
91
+ {% for ad in ads %}
92
+ <div class="col-md-6 mb-4">
93
+ <div class="card h-100">
94
+ <div class="card-body">
95
+ {% if ad.image_urls %}
96
+ <div class="mb-3">
97
+ <img src="{{ ad.image_urls[0] }}" class="img-fluid rounded" alt="Ad Image">
98
+ </div>
99
+ {% endif %}
100
+
101
+ <p class="card-text">{{ ad.content[:200] }}{% if ad.content|length > 200 %}...{% endif %}</p>
102
+
103
+ {% if ad.sentiment %}
104
+ <div class="mb-2">
105
+ <strong>Sentiment:</strong>
106
+ <span class="badge bg-{{ 'success' if ad.sentiment > 0 else 'danger' if ad.sentiment < 0 else 'secondary' }}">
107
+ {{ "Positive" if ad.sentiment > 0 else "Negative" if ad.sentiment < 0 else "Neutral" }}
108
+ </span>
109
+ </div>
110
+ {% endif %}
111
+
112
+ {% if ad.topics %}
113
+ <div class="mb-2">
114
+ <strong>Topics:</strong>
115
+ {% for topic in ad.topics %}
116
+ <span class="badge bg-info me-1">{{ topic }}</span>
117
+ {% endfor %}
118
+ </div>
119
+ {% endif %}
120
+
121
+ <div class="mt-3">
122
+ <a href="{{ url_for('facebook_ads.view_ad', ad_id=ad.id) }}" class="btn btn-primary btn-sm">View Details</a>
123
+ {% if not ad.sentiment or not ad.topics %}
124
+ <a href="{{ url_for('facebook_ads.analyze_ad', ad_id=ad.id) }}" class="btn btn-secondary btn-sm">Analyze</a>
125
+ {% endif %}
126
+ </div>
127
+ </div>
128
+ <div class="card-footer text-muted">
129
+ {{ ad.created_at.strftime('%Y-%m-%d %H:%M:%S') }}
130
+ </div>
131
+ </div>
132
+ </div>
133
+ {% endfor %}
134
+ </div>
135
+ {% else %}
136
+ <div class="alert alert-info" role="alert">
137
+ No ads found for this advertiser. Try <a href="{{ url_for('facebook_ads.page_search') }}?page_name={{ advertiser }}">scraping more ads</a>.
138
+ </div>
139
+ {% endif %}
140
+ </div>
141
+ {% endblock %}
app/templates/facebook_ads/advertisers.html ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Facebook Advertisers{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container mt-4">
7
+ <h1 class="mb-4">Facebook Advertisers</h1>
8
+
9
+ {% if advertisers %}
10
+ <div class="card">
11
+ <div class="card-body">
12
+ <div class="table-responsive">
13
+ <table class="table table-striped table-hover">
14
+ <thead>
15
+ <tr>
16
+ <th>Advertiser</th>
17
+ <th>Number of Ads</th>
18
+ <th>Actions</th>
19
+ </tr>
20
+ </thead>
21
+ <tbody>
22
+ {% for advertiser in advertisers %}
23
+ <tr>
24
+ <td>{{ advertiser[0] }}</td>
25
+ <td>{{ advertiser[1] }}</td>
26
+ <td>
27
+ <a href="{{ url_for('facebook_ads.advertiser_detail', advertiser_name=advertiser[0]) }}"
28
+ class="btn btn-primary btn-sm">View Ads</a>
29
+ </td>
30
+ </tr>
31
+ {% endfor %}
32
+ </tbody>
33
+ </table>
34
+ </div>
35
+ </div>
36
+ </div>
37
+ {% else %}
38
+ <div class="alert alert-info" role="alert">
39
+ No advertisers found. Try <a href="{{ url_for('facebook_ads.search') }}">searching for ads</a> first.
40
+ </div>
41
+ {% endif %}
42
+ </div>
43
+ {% endblock %}
app/templates/facebook_ads/index.html ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Facebook Ads Dashboard{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container mt-4">
7
+ <h1 class="mb-4">Facebook Ads Dashboard</h1>
8
+
9
+ <div class="row">
10
+ <div class="col-md-6">
11
+ <div class="card mb-4">
12
+ <div class="card-body">
13
+ <h5 class="card-title">Search Ads</h5>
14
+ <p class="card-text">Search for Facebook ads using keywords and filters.</p>
15
+ <a href="{{ url_for('facebook_ads.search') }}" class="btn btn-primary">Search Ads</a>
16
+ </div>
17
+ </div>
18
+ </div>
19
+
20
+ <div class="col-md-6">
21
+ <div class="card mb-4">
22
+ <div class="card-body">
23
+ <h5 class="card-title">Page Search</h5>
24
+ <p class="card-text">Search for ads from specific Facebook pages.</p>
25
+ <a href="{{ url_for('facebook_ads.page_search') }}" class="btn btn-primary">Search by Page</a>
26
+ </div>
27
+ </div>
28
+ </div>
29
+ </div>
30
+
31
+ <div class="row">
32
+ <div class="col-md-6">
33
+ <div class="card mb-4">
34
+ <div class="card-body">
35
+ <h5 class="card-title">View Results</h5>
36
+ <p class="card-text">Browse and analyze collected Facebook ads.</p>
37
+ <a href="{{ url_for('facebook_ads.results') }}" class="btn btn-primary">View Results</a>
38
+ </div>
39
+ </div>
40
+ </div>
41
+
42
+ <div class="col-md-6">
43
+ <div class="card mb-4">
44
+ <div class="card-body">
45
+ <h5 class="card-title">Advertisers</h5>
46
+ <p class="card-text">View and analyze advertisers and their ads.</p>
47
+ <a href="{{ url_for('facebook_ads.advertisers') }}" class="btn btn-primary">View Advertisers</a>
48
+ </div>
49
+ </div>
50
+ </div>
51
+ </div>
52
+ </div>
53
+ {% endblock %}
app/templates/facebook_ads/page_search.html ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Search Facebook Page Ads{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container mt-4">
7
+ <h1 class="mb-4">Search Facebook Page Ads</h1>
8
+
9
+ <div class="card mb-4">
10
+ <div class="card-body">
11
+ <form method="POST" action="{{ url_for('facebook_ads.page_search') }}">
12
+ {{ form.csrf_token }}
13
+ <div class="mb-3">
14
+ <label for="page_name" class="form-label">Facebook Page Name</label>
15
+ <input type="text" class="form-control" id="page_name" name="page_name" required
16
+ placeholder="Enter the Facebook page name or URL">
17
+ <small class="text-muted">Example: cocacola or https://www.facebook.com/cocacola</small>
18
+ </div>
19
+
20
+ <div class="mb-3">
21
+ <label for="num_scrolls" class="form-label">Number of Scrolls</label>
22
+ <input type="number" class="form-control" id="num_scrolls" name="num_scrolls"
23
+ value="5" min="1" max="50">
24
+ <small class="text-muted">More scrolls = more ads, but takes longer to scrape</small>
25
+ </div>
26
+
27
+ <button type="submit" class="btn btn-primary">Search Page Ads</button>
28
+ <a href="{{ url_for('facebook_ads.index') }}" class="btn btn-secondary">Back to Dashboard</a>
29
+ </form>
30
+ </div>
31
+ </div>
32
+
33
+ {% if task_id %}
34
+ <div class="alert alert-info" role="alert">
35
+ <h4 class="alert-heading">Scraping in Progress!</h4>
36
+ <p>Your page search request is being processed. This may take a few minutes depending on the number of scrolls.</p>
37
+ <hr>
38
+ <p class="mb-0">You can view the results on the <a href="{{ url_for('facebook_ads.results') }}">Results page</a> once the scraping is complete.</p>
39
+ </div>
40
+ {% endif %}
41
+
42
+ {% with messages = get_flashed_messages(with_categories=true) %}
43
+ {% if messages %}
44
+ {% for category, message in messages %}
45
+ <div class="alert alert-{{ category }}" role="alert">
46
+ {{ message }}
47
+ </div>
48
+ {% endfor %}
49
+ {% endif %}
50
+ {% endwith %}
51
+ </div>
52
+ {% endblock %}
app/templates/facebook_ads/results.html ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Facebook Ads Results{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container mt-4">
7
+ <h1 class="mb-4">Facebook Ads Results</h1>
8
+
9
+ <div class="card mb-4">
10
+ <div class="card-body">
11
+ <form method="GET" action="{{ url_for('facebook_ads.results') }}" class="row g-3">
12
+ <div class="col-md-4">
13
+ <label for="query" class="form-label">Search Query</label>
14
+ <input type="text" class="form-control" id="query" name="query" value="{{ query }}">
15
+ </div>
16
+ <div class="col-md-4">
17
+ <label for="advertiser" class="form-label">Advertiser</label>
18
+ <input type="text" class="form-control" id="advertiser" name="advertiser" value="{{ advertiser }}">
19
+ </div>
20
+ <div class="col-md-4">
21
+ <label class="form-label">&nbsp;</label>
22
+ <div>
23
+ <button type="submit" class="btn btn-primary">Filter</button>
24
+ <a href="{{ url_for('facebook_ads.results') }}" class="btn btn-secondary">Clear Filters</a>
25
+ </div>
26
+ </div>
27
+ </form>
28
+ </div>
29
+ </div>
30
+
31
+ {% if ads %}
32
+ <div class="row">
33
+ {% for ad in ads %}
34
+ <div class="col-md-6 mb-4">
35
+ <div class="card h-100">
36
+ <div class="card-body">
37
+ <h5 class="card-title">
38
+ <a href="{{ url_for('facebook_ads.advertiser_detail', advertiser_name=ad.advertiser) }}">
39
+ {{ ad.advertiser }}
40
+ </a>
41
+ </h5>
42
+
43
+ {% if ad.image_urls %}
44
+ <div class="mb-3">
45
+ <img src="{{ ad.image_urls[0] }}" class="img-fluid rounded" alt="Ad Image">
46
+ </div>
47
+ {% endif %}
48
+
49
+ <p class="card-text">{{ ad.content[:200] }}{% if ad.content|length > 200 %}...{% endif %}</p>
50
+
51
+ {% if ad.sentiment %}
52
+ <div class="mb-2">
53
+ <strong>Sentiment:</strong>
54
+ <span class="badge bg-{{ 'success' if ad.sentiment > 0 else 'danger' if ad.sentiment < 0 else 'secondary' }}">
55
+ {{ "Positive" if ad.sentiment > 0 else "Negative" if ad.sentiment < 0 else "Neutral" }}
56
+ </span>
57
+ </div>
58
+ {% endif %}
59
+
60
+ {% if ad.topics %}
61
+ <div class="mb-2">
62
+ <strong>Topics:</strong>
63
+ {% for topic in ad.topics %}
64
+ <span class="badge bg-info me-1">{{ topic }}</span>
65
+ {% endfor %}
66
+ </div>
67
+ {% endif %}
68
+
69
+ <div class="mt-3">
70
+ <a href="{{ url_for('facebook_ads.view_ad', ad_id=ad.id) }}" class="btn btn-primary btn-sm">View Details</a>
71
+ <a href="{{ url_for('facebook_ads.analyze_ad', ad_id=ad.id) }}" class="btn btn-secondary btn-sm">Analyze</a>
72
+ </div>
73
+ </div>
74
+ <div class="card-footer text-muted">
75
+ Scraped: {{ ad.created_at.strftime('%Y-%m-%d %H:%M:%S') }}
76
+ </div>
77
+ </div>
78
+ </div>
79
+ {% endfor %}
80
+ </div>
81
+ {% else %}
82
+ <div class="alert alert-info" role="alert">
83
+ No ads found. Try adjusting your search filters or <a href="{{ url_for('facebook_ads.search') }}">search for new ads</a>.
84
+ </div>
85
+ {% endif %}
86
+ </div>
87
+ {% endblock %}
app/templates/facebook_ads/search.html ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Search Facebook Ads{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container mt-4">
7
+ <h1 class="mb-4">Search Facebook Ads</h1>
8
+
9
+ <div class="card mb-4">
10
+ <div class="card-body">
11
+ <form method="POST" action="{{ url_for('facebook_ads.search') }}">
12
+ {{ form.csrf_token }}
13
+ <div class="mb-3">
14
+ <label for="search_query" class="form-label">Search Query</label>
15
+ <input type="text" class="form-control" id="search_query" name="search_query" required
16
+ placeholder="Enter keywords to search for ads">
17
+ </div>
18
+
19
+ <div class="mb-3">
20
+ <label for="num_scrolls" class="form-label">Number of Scrolls</label>
21
+ <input type="number" class="form-control" id="num_scrolls" name="num_scrolls"
22
+ value="5" min="1" max="50">
23
+ <small class="text-muted">More scrolls = more ads, but takes longer to scrape</small>
24
+ </div>
25
+
26
+ <div class="mb-3">
27
+ <label for="country_code" class="form-label">Country</label>
28
+ <select class="form-control" id="country_code" name="country_code">
29
+ <option value="ALL">All Countries</option>
30
+ <option value="US">United States</option>
31
+ <option value="GB">United Kingdom</option>
32
+ <option value="CA">Canada</option>
33
+ <option value="AU">Australia</option>
34
+ <!-- Add more countries as needed -->
35
+ </select>
36
+ </div>
37
+
38
+ <button type="submit" class="btn btn-primary">Search Ads</button>
39
+ <a href="{{ url_for('facebook_ads.index') }}" class="btn btn-secondary">Back to Dashboard</a>
40
+ </form>
41
+ </div>
42
+ </div>
43
+
44
+ {% if task_id %}
45
+ <div class="alert alert-info" role="alert">
46
+ <h4 class="alert-heading">Scraping in Progress!</h4>
47
+ <p>Your search request is being processed. This may take a few minutes depending on the number of scrolls.</p>
48
+ <hr>
49
+ <p class="mb-0">You can view the results on the <a href="{{ url_for('facebook_ads.results') }}">Results page</a> once the scraping is complete.</p>
50
+ </div>
51
+ {% endif %}
52
+
53
+ {% with messages = get_flashed_messages(with_categories=true) %}
54
+ {% if messages %}
55
+ {% for category, message in messages %}
56
+ <div class="alert alert-{{ category }}" role="alert">
57
+ {{ message }}
58
+ </div>
59
+ {% endfor %}
60
+ {% endif %}
61
+ {% endwith %}
62
+ </div>
63
+ {% endblock %}
app/templates/login.html CHANGED
@@ -1,15 +1,28 @@
1
  {% extends "base.html" %}
2
 
3
  {% block content %}
4
- <h2>Login</h2>
5
- <form method="POST" action="{{ url_for('auth.login') }}">
6
- <label for="email">Email:</label>
7
- <input type="email" id="email" name="email" required>
8
-
9
- <label for="password">Password:</label>
10
- <input type="password" id="password" name="password" required>
11
-
12
- <button type="submit">Login</button>
13
- </form>
14
- <p>Don't have an account? <a href="{{ url_for('auth.register') }}">Register here</a>.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  {% endblock %}
 
1
  {% extends "base.html" %}
2
 
3
  {% block content %}
4
+ <div class="container mt-5">
5
+ <div class="row justify-content-center">
6
+ <div class="col-md-6">
7
+ <div class="card">
8
+ <div class="card-header">Login</div>
9
+ <div class="card-body">
10
+ <form method="POST" action="{{ url_for('auth.login') }}">
11
+ {{ form.hidden_tag() }}
12
+ <div class="form-group">
13
+ {{ form.email.label }} {{ form.email(class="form-control") }}
14
+ </div>
15
+ <div class="form-group">
16
+ {{ form.password.label }} {{ form.password(class="form-control") }}
17
+ </div>
18
+ <button type="submit" class="btn btn-primary">Login</button>
19
+ </form>
20
+ <div class="mt-3">
21
+ <p>Don't have an account? <a href="{{ url_for('auth.register') }}">Register here</a>.</p>
22
+ </div>
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
  {% endblock %}
celery.db ADDED
Binary file (32.8 kB). View file
 
config.py CHANGED
@@ -7,10 +7,13 @@ class Config:
7
  if SECRET_KEY == 'dev-secret-key-change-in-production':
8
  print("WARNING: Using default SECRET_KEY. This is insecure and should be changed in production.")
9
 
10
- SQLALCHEMY_DATABASE_URI = os.getenv('DATABASE_URL', 'sqlite:////tmp/app.db')
 
11
  SQLALCHEMY_TRACK_MODIFICATIONS = False
12
- CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0')
13
- CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0')
 
 
14
 
15
  # Use a mock API key for development if not provided
16
  OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'sk-mock-key-for-development')
 
7
  if SECRET_KEY == 'dev-secret-key-change-in-production':
8
  print("WARNING: Using default SECRET_KEY. This is insecure and should be changed in production.")
9
 
10
+ # Use SQLite for simplicity
11
+ SQLALCHEMY_DATABASE_URI = os.getenv('DATABASE_URL', 'sqlite:///app.db')
12
  SQLALCHEMY_TRACK_MODIFICATIONS = False
13
+
14
+ # Use SQLite for Celery broker instead of Redis
15
+ CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'sqla+sqlite:///celery.db')
16
+ CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'db+sqlite:///celery-results.db')
17
 
18
  # Use a mock API key for development if not provided
19
  OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'sk-mock-key-for-development')
migrations/README ADDED
@@ -0,0 +1 @@
 
 
1
+ Single-database configuration for Flask.
migrations/__pycache__/env.cpython-312.pyc ADDED
Binary file (4.5 kB). View file
 
migrations/alembic.ini ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A generic, single database configuration.
2
+
3
+ [alembic]
4
+ # template used to generate migration files
5
+ # file_template = %%(rev)s_%%(slug)s
6
+
7
+ # set to 'true' to run the environment during
8
+ # the 'revision' command, regardless of autogenerate
9
+ # revision_environment = false
10
+
11
+
12
+ # Logging configuration
13
+ [loggers]
14
+ keys = root,sqlalchemy,alembic,flask_migrate
15
+
16
+ [handlers]
17
+ keys = console
18
+
19
+ [formatters]
20
+ keys = generic
21
+
22
+ [logger_root]
23
+ level = WARN
24
+ handlers = console
25
+ qualname =
26
+
27
+ [logger_sqlalchemy]
28
+ level = WARN
29
+ handlers =
30
+ qualname = sqlalchemy.engine
31
+
32
+ [logger_alembic]
33
+ level = INFO
34
+ handlers =
35
+ qualname = alembic
36
+
37
+ [logger_flask_migrate]
38
+ level = INFO
39
+ handlers =
40
+ qualname = flask_migrate
41
+
42
+ [handler_console]
43
+ class = StreamHandler
44
+ args = (sys.stderr,)
45
+ level = NOTSET
46
+ formatter = generic
47
+
48
+ [formatter_generic]
49
+ format = %(levelname)-5.5s [%(name)s] %(message)s
50
+ datefmt = %H:%M:%S
migrations/env.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from logging.config import fileConfig
3
+
4
+ from flask import current_app
5
+
6
+ from alembic import context
7
+
8
+ # this is the Alembic Config object, which provides
9
+ # access to the values within the .ini file in use.
10
+ config = context.config
11
+
12
+ # Interpret the config file for Python logging.
13
+ # This line sets up loggers basically.
14
+ fileConfig(config.config_file_name)
15
+ logger = logging.getLogger('alembic.env')
16
+
17
+
18
+ def get_engine():
19
+ try:
20
+ # this works with Flask-SQLAlchemy<3 and Alchemical
21
+ return current_app.extensions['migrate'].db.get_engine()
22
+ except (TypeError, AttributeError):
23
+ # this works with Flask-SQLAlchemy>=3
24
+ return current_app.extensions['migrate'].db.engine
25
+
26
+
27
+ def get_engine_url():
28
+ try:
29
+ return get_engine().url.render_as_string(hide_password=False).replace(
30
+ '%', '%%')
31
+ except AttributeError:
32
+ return str(get_engine().url).replace('%', '%%')
33
+
34
+
35
+ # add your model's MetaData object here
36
+ # for 'autogenerate' support
37
+ # from myapp import mymodel
38
+ # target_metadata = mymodel.Base.metadata
39
+ config.set_main_option('sqlalchemy.url', get_engine_url())
40
+ target_db = current_app.extensions['migrate'].db
41
+
42
+ # other values from the config, defined by the needs of env.py,
43
+ # can be acquired:
44
+ # my_important_option = config.get_main_option("my_important_option")
45
+ # ... etc.
46
+
47
+
48
+ def get_metadata():
49
+ if hasattr(target_db, 'metadatas'):
50
+ return target_db.metadatas[None]
51
+ return target_db.metadata
52
+
53
+
54
+ def run_migrations_offline():
55
+ """Run migrations in 'offline' mode.
56
+
57
+ This configures the context with just a URL
58
+ and not an Engine, though an Engine is acceptable
59
+ here as well. By skipping the Engine creation
60
+ we don't even need a DBAPI to be available.
61
+
62
+ Calls to context.execute() here emit the given string to the
63
+ script output.
64
+
65
+ """
66
+ url = config.get_main_option("sqlalchemy.url")
67
+ context.configure(
68
+ url=url, target_metadata=get_metadata(), literal_binds=True
69
+ )
70
+
71
+ with context.begin_transaction():
72
+ context.run_migrations()
73
+
74
+
75
+ def run_migrations_online():
76
+ """Run migrations in 'online' mode.
77
+
78
+ In this scenario we need to create an Engine
79
+ and associate a connection with the context.
80
+
81
+ """
82
+
83
+ # this callback is used to prevent an auto-migration from being generated
84
+ # when there are no changes to the schema
85
+ # reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html
86
+ def process_revision_directives(context, revision, directives):
87
+ if getattr(config.cmd_opts, 'autogenerate', False):
88
+ script = directives[0]
89
+ if script.upgrade_ops.is_empty():
90
+ directives[:] = []
91
+ logger.info('No changes in schema detected.')
92
+
93
+ conf_args = current_app.extensions['migrate'].configure_args
94
+ if conf_args.get("process_revision_directives") is None:
95
+ conf_args["process_revision_directives"] = process_revision_directives
96
+
97
+ connectable = get_engine()
98
+
99
+ with connectable.connect() as connection:
100
+ context.configure(
101
+ connection=connection,
102
+ target_metadata=get_metadata(),
103
+ **conf_args
104
+ )
105
+
106
+ with context.begin_transaction():
107
+ context.run_migrations()
108
+
109
+
110
+ if context.is_offline_mode():
111
+ run_migrations_offline()
112
+ else:
113
+ run_migrations_online()
migrations/script.py.mako ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+
7
+ """
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+ ${imports if imports else ""}
11
+
12
+ # revision identifiers, used by Alembic.
13
+ revision = ${repr(up_revision)}
14
+ down_revision = ${repr(down_revision)}
15
+ branch_labels = ${repr(branch_labels)}
16
+ depends_on = ${repr(depends_on)}
17
+
18
+
19
+ def upgrade():
20
+ ${upgrades if upgrades else "pass"}
21
+
22
+
23
+ def downgrade():
24
+ ${downgrades if downgrades else "pass"}
migrations/versions/__pycache__/dddcd665398d_add_facebook_ad_table.cpython-312.pyc ADDED
Binary file (4.34 kB). View file
 
migrations/versions/dddcd665398d_add_facebook_ad_table.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Add facebook_ad table
2
+
3
+ Revision ID: dddcd665398d
4
+ Revises:
5
+ Create Date: 2025-03-10 09:02:26.975759
6
+
7
+ """
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+
11
+
12
+ # revision identifiers, used by Alembic.
13
+ revision = 'dddcd665398d'
14
+ down_revision = None
15
+ branch_labels = None
16
+ depends_on = None
17
+
18
+
19
+ def upgrade():
20
+ # ### commands auto generated by Alembic - please adjust! ###
21
+ op.create_table('facebook_ad',
22
+ sa.Column('id', sa.String(length=36), nullable=False),
23
+ sa.Column('ad_id', sa.String(length=255), nullable=True),
24
+ sa.Column('advertiser', sa.String(length=255), nullable=True),
25
+ sa.Column('advertiser_id', sa.String(length=255), nullable=True),
26
+ sa.Column('content', sa.Text(), nullable=True),
27
+ sa.Column('images', sa.JSON(), nullable=True),
28
+ sa.Column('links', sa.JSON(), nullable=True),
29
+ sa.Column('search_query', sa.String(length=255), nullable=True),
30
+ sa.Column('position', sa.Integer(), nullable=True),
31
+ sa.Column('sentiment', sa.JSON(), nullable=True),
32
+ sa.Column('topics', sa.JSON(), nullable=True),
33
+ sa.Column('entities', sa.JSON(), nullable=True),
34
+ sa.Column('raw_data', sa.JSON(), nullable=True),
35
+ sa.Column('raw_text', sa.Text(), nullable=True),
36
+ sa.Column('created_at', sa.DateTime(), nullable=True),
37
+ sa.Column('updated_at', sa.DateTime(), nullable=True),
38
+ sa.Column('user_id', sa.Integer(), nullable=True),
39
+ sa.ForeignKeyConstraint(['user_id'], ['user.id'], ),
40
+ sa.PrimaryKeyConstraint('id')
41
+ )
42
+ with op.batch_alter_table('facebook_ad', schema=None) as batch_op:
43
+ batch_op.create_index(batch_op.f('ix_facebook_ad_ad_id'), ['ad_id'], unique=False)
44
+ batch_op.create_index(batch_op.f('ix_facebook_ad_advertiser'), ['advertiser'], unique=False)
45
+ batch_op.create_index(batch_op.f('ix_facebook_ad_advertiser_id'), ['advertiser_id'], unique=False)
46
+ batch_op.create_index(batch_op.f('ix_facebook_ad_search_query'), ['search_query'], unique=False)
47
+
48
+ # ### end Alembic commands ###
49
+
50
+
51
+ def downgrade():
52
+ # ### commands auto generated by Alembic - please adjust! ###
53
+ with op.batch_alter_table('facebook_ad', schema=None) as batch_op:
54
+ batch_op.drop_index(batch_op.f('ix_facebook_ad_search_query'))
55
+ batch_op.drop_index(batch_op.f('ix_facebook_ad_advertiser_id'))
56
+ batch_op.drop_index(batch_op.f('ix_facebook_ad_advertiser'))
57
+ batch_op.drop_index(batch_op.f('ix_facebook_ad_ad_id'))
58
+
59
+ op.drop_table('facebook_ad')
60
+ # ### end Alembic commands ###
requirements.txt CHANGED
@@ -1,22 +1,21 @@
1
- Flask==2.3.2
2
- Flask-SQLAlchemy==3.0.5
3
- Flask-Login==0.6.2
4
- Flask-WTF==1.1.1
5
- Flask-Migrate==4.0.4
6
- Werkzeug==2.3.7
7
- celery==5.3.1
8
- redis==5.0.0
9
- selenium==4.10.0
10
- transformers==4.31.0
11
- numpy<2.0.0
12
- opencv-python==4.8.0.76
13
- pytesseract==0.3.10
14
- gunicorn==20.1.0
15
- pytest==7.4.0
16
- prophet==1.1.4
17
- webdriver-manager==4.0.0
18
- psycopg2-binary==2.9.6
19
- click==8.1.3
20
- python-dotenv==1.0.0
21
- ratelimit==2.2.1
22
- torch==2.0.1
 
1
+ Flask==3.0.0
2
+ Flask-SQLAlchemy==3.1.1
3
+ Flask-Login==0.6.3
4
+ Flask-Migrate==4.0.5
5
+ Flask-WTF==1.2.1
6
+ email_validator==2.1.0.post1
7
+ celery==5.3.6
8
+ redis==5.0.1
9
+ alembic==1.13.1
10
+ python-dotenv==1.0.1
11
+ gunicorn==21.2.0
12
+ psycopg2-binary==2.9.9
13
+ requests==2.31.0
14
+ beautifulsoup4==4.12.3
15
+ selenium==4.17.2
16
+ transformers==4.37.2
17
+ torch==2.1.2
18
+ textblob==0.17.1
19
+ spacy==3.7.2
20
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
21
+ webdriver-manager==4.0.1