Waseem7711 commited on
Commit
932c5b4
·
verified ·
1 Parent(s): 2b46621

Create seo_analyzer.py

Browse files
Files changed (1) hide show
  1. seo_analyzer.py +265 -0
seo_analyzer.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import json
4
+ import re
5
+ from urllib.parse import urljoin, urlparse
6
+
7
+ class SEOAnalyzer:
8
+ def __init__(self):
9
+ self.session = requests.Session()
10
+ self.session.headers.update({
11
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
12
+ })
13
+
14
+ def analyze_website(self, url):
15
+ """Main analysis function"""
16
+ try:
17
+ # Fetch the webpage
18
+ response = self.session.get(url, timeout=10, allow_redirects=True)
19
+ response.raise_for_status()
20
+
21
+ # Parse HTML
22
+ soup = BeautifulSoup(response.text, 'html.parser')
23
+
24
+ # Extract metadata
25
+ metadata = self.extract_metadata(soup)
26
+
27
+ # Extract structured data
28
+ structured_data = self.extract_structured_data(soup)
29
+
30
+ # Calculate SEO score
31
+ seo_score = self.calculate_seo_score(metadata)
32
+
33
+ # Generate recommendations
34
+ recommendations = self.generate_recommendations(metadata, url)
35
+
36
+ return {
37
+ 'success': True,
38
+ 'metadata': metadata,
39
+ 'structured_data': structured_data,
40
+ 'seo_score': seo_score,
41
+ 'recommendations': recommendations,
42
+ 'html_head': str(soup.head) if soup.head else "Head section not found"
43
+ }
44
+
45
+ except requests.exceptions.RequestException as e:
46
+ return {
47
+ 'success': False,
48
+ 'error': f"Failed to fetch website: {str(e)}"
49
+ }
50
+ except Exception as e:
51
+ return {
52
+ 'success': False,
53
+ 'error': f"Analysis error: {str(e)}"
54
+ }
55
+
56
+ def extract_metadata(self, soup):
57
+ """Extract all relevant metadata from the page"""
58
+ metadata = {}
59
+
60
+ # Basic meta tags
61
+ if soup.title:
62
+ metadata['title'] = soup.title.string.strip() if soup.title.string else ""
63
+
64
+ # Meta tags
65
+ meta_tags = soup.find_all('meta')
66
+ for tag in meta_tags:
67
+ # Standard meta tags
68
+ if tag.get('name'):
69
+ name = tag.get('name').lower()
70
+ content = tag.get('content', '')
71
+ metadata[name] = content
72
+
73
+ # Property meta tags (Open Graph, etc.)
74
+ elif tag.get('property'):
75
+ prop = tag.get('property').lower()
76
+ content = tag.get('content', '')
77
+ metadata[prop] = content
78
+
79
+ # HTTP-equiv meta tags
80
+ elif tag.get('http-equiv'):
81
+ equiv = tag.get('http-equiv').lower()
82
+ content = tag.get('content', '')
83
+ metadata[f'http-equiv-{equiv}'] = content
84
+
85
+ # Charset
86
+ elif tag.get('charset'):
87
+ metadata['charset'] = tag.get('charset')
88
+
89
+ # Link tags (canonical, etc.)
90
+ link_tags = soup.find_all('link')
91
+ for tag in link_tags:
92
+ rel = tag.get('rel')
93
+ if rel:
94
+ rel_str = ' '.join(rel) if isinstance(rel, list) else rel
95
+ if rel_str in ['canonical', 'alternate', 'prev', 'next']:
96
+ metadata[f'link-{rel_str}'] = tag.get('href', '')
97
+
98
+ return metadata
99
+
100
+ def extract_structured_data(self, soup):
101
+ """Extract JSON-LD structured data"""
102
+ structured_data = []
103
+
104
+ scripts = soup.find_all('script', type='application/ld+json')
105
+ for script in scripts:
106
+ try:
107
+ if script.string:
108
+ data = json.loads(script.string.strip())
109
+ structured_data.append(data)
110
+ except json.JSONDecodeError:
111
+ continue
112
+
113
+ return structured_data
114
+
115
+ def calculate_seo_score(self, metadata):
116
+ """Calculate SEO score based on best practices"""
117
+ score = 0
118
+ max_score = 100
119
+
120
+ # Title tag (20 points)
121
+ title = metadata.get('title', '')
122
+ if title:
123
+ score += 10
124
+ if 30 <= len(title) <= 60:
125
+ score += 10
126
+
127
+ # Meta description (20 points)
128
+ description = metadata.get('description', '')
129
+ if description:
130
+ score += 10
131
+ if 120 <= len(description) <= 160:
132
+ score += 10
133
+
134
+ # Open Graph tags (20 points)
135
+ og_title = metadata.get('og:title', '')
136
+ og_description = metadata.get('og:description', '')
137
+ og_image = metadata.get('og:image', '')
138
+
139
+ if og_title:
140
+ score += 7
141
+ if og_description:
142
+ score += 7
143
+ if og_image:
144
+ score += 6
145
+
146
+ # Twitter Card (15 points)
147
+ twitter_card = metadata.get('twitter:card', '')
148
+ twitter_title = metadata.get('twitter:title', '')
149
+ twitter_description = metadata.get('twitter:description', '')
150
+
151
+ if twitter_card:
152
+ score += 5
153
+ if twitter_title:
154
+ score += 5
155
+ if twitter_description:
156
+ score += 5
157
+
158
+ # Technical SEO (25 points)
159
+ if metadata.get('viewport'):
160
+ score += 5
161
+ if metadata.get('charset'):
162
+ score += 5
163
+ if metadata.get('robots'):
164
+ score += 5
165
+ if metadata.get('link-canonical'):
166
+ score += 5
167
+ if not metadata.get('robots') or 'noindex' not in metadata.get('robots', '').lower():
168
+ score += 5
169
+
170
+ return min(score, max_score)
171
+
172
+ def generate_recommendations(self, metadata, url):
173
+ """Generate actionable SEO recommendations"""
174
+ recommendations = []
175
+
176
+ # Title tag recommendations
177
+ title = metadata.get('title', '')
178
+ if not title:
179
+ recommendations.append({
180
+ 'type': 'error',
181
+ 'message': 'Missing title tag. Add a descriptive title between 30-60 characters.'
182
+ })
183
+ elif len(title) < 30:
184
+ recommendations.append({
185
+ 'type': 'warning',
186
+ 'message': f'Title tag is too short ({len(title)} chars). Aim for 30-60 characters.'
187
+ })
188
+ elif len(title) > 60:
189
+ recommendations.append({
190
+ 'type': 'warning',
191
+ 'message': f'Title tag is too long ({len(title)} chars). Keep it under 60 characters to avoid truncation.'
192
+ })
193
+
194
+ # Meta description recommendations
195
+ description = metadata.get('description', '')
196
+ if not description:
197
+ recommendations.append({
198
+ 'type': 'error',
199
+ 'message': 'Missing meta description. Add a compelling description between 120-160 characters.'
200
+ })
201
+ elif len(description) < 120:
202
+ recommendations.append({
203
+ 'type': 'warning',
204
+ 'message': f'Meta description is too short ({len(description)} chars). Aim for 120-160 characters.'
205
+ })
206
+ elif len(description) > 160:
207
+ recommendations.append({
208
+ 'type': 'warning',
209
+ 'message': f'Meta description is too long ({len(description)} chars). Keep it under 160 characters.'
210
+ })
211
+
212
+ # Open Graph recommendations
213
+ if not metadata.get('og:title'):
214
+ recommendations.append({
215
+ 'type': 'warning',
216
+ 'message': 'Missing Open Graph title. Add og:title for better social media sharing.'
217
+ })
218
+
219
+ if not metadata.get('og:description'):
220
+ recommendations.append({
221
+ 'type': 'warning',
222
+ 'message': 'Missing Open Graph description. Add og:description for social media previews.'
223
+ })
224
+
225
+ if not metadata.get('og:image'):
226
+ recommendations.append({
227
+ 'type': 'warning',
228
+ 'message': 'Missing Open Graph image. Add og:image (1200x630px recommended) for social sharing.'
229
+ })
230
+
231
+ # Twitter Card recommendations
232
+ if not metadata.get('twitter:card'):
233
+ recommendations.append({
234
+ 'type': 'info',
235
+ 'message': 'Consider adding Twitter Card meta tags for better Twitter sharing experience.'
236
+ })
237
+
238
+ # Technical SEO recommendations
239
+ if not metadata.get('viewport'):
240
+ recommendations.append({
241
+ 'type': 'error',
242
+ 'message': 'Missing viewport meta tag. Add <meta name="viewport" content="width=device-width, initial-scale=1"> for mobile optimization.'
243
+ })
244
+
245
+ if not metadata.get('charset'):
246
+ recommendations.append({
247
+ 'type': 'warning',
248
+ 'message': 'Missing charset declaration. Add <meta charset="UTF-8"> in the head section.'
249
+ })
250
+
251
+ if not metadata.get('link-canonical'):
252
+ recommendations.append({
253
+ 'type': 'info',
254
+ 'message': 'Consider adding a canonical URL to prevent duplicate content issues.'
255
+ })
256
+
257
+ # Robots meta tag
258
+ robots = metadata.get('robots', '')
259
+ if 'noindex' in robots.lower():
260
+ recommendations.append({
261
+ 'type': 'warning',
262
+ 'message': 'Page is set to noindex. Remove this if you want the page to be indexed by search engines.'
263
+ })
264
+
265
+ return recommendations