vijaykumaredstellar commited on
Commit
98f6c84
Β·
verified Β·
1 Parent(s): cb5e2da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -19
app.py CHANGED
@@ -6,8 +6,6 @@ import pickle
6
  from huggingface_hub import hf_hub_download
7
  from sklearn.metrics.pairwise import cosine_similarity
8
  import httpx
9
- from bs4 import BeautifulSoup
10
- import re
11
 
12
  # ============================================
13
  # CONFIGURATION
@@ -42,7 +40,8 @@ class KnowledgeBase:
42
  self.embeddings = data['embeddings']
43
  self.loaded = True
44
 
45
- return True, f"βœ… Loaded {len(self.knowledge_base)} searchable paragraphs from {len(set(p['url'] for p in self.knowledge_base))} blog posts"
 
46
 
47
  except Exception as e:
48
  return False, f"❌ Error: {str(e)}"
@@ -123,7 +122,6 @@ class OrphanPageAnalyzer:
123
  def analyze(self, orphan_url, num_sources=3):
124
  """
125
  Complete analysis: Find sources, placements, and generate report
126
- Returns: markdown report with implementation details
127
  """
128
 
129
  # Get orphan page metadata
@@ -249,23 +247,174 @@ Provide ONLY the modified sentence with the anchor text naturally integrated."""
249
  def generate_report(self, orphan_url, orphan_title, results):
250
  """Generate markdown report"""
251
 
252
- report = f"""# πŸ”— Internal Linking Report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
- **Orphan Page:** {orphan_title}
255
- **Target URL:** `{orphan_url}`
256
- **Links Found:** {len(results)}
 
 
257
 
258
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
- """
261
-
262
- for i, result in enumerate(results, 1):
263
- report += f"""
264
- ## Link {i}: {result['source_title']}
 
 
 
 
 
 
 
 
 
265
 
266
- **Source URL:** `{result['source_url']}`
267
- **Paragraph #:** {result['paragraph_index']}
268
- **Relevance Score:** {result['score']}/100
269
- **Anchor Text:** "{result['anchor_text']}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
- ### Current Sentence:
 
 
 
6
  from huggingface_hub import hf_hub_download
7
  from sklearn.metrics.pairwise import cosine_similarity
8
  import httpx
 
 
9
 
10
  # ============================================
11
  # CONFIGURATION
 
40
  self.embeddings = data['embeddings']
41
  self.loaded = True
42
 
43
+ num_posts = len(set(p['url'] for p in self.knowledge_base))
44
+ return True, f"βœ… Loaded {len(self.knowledge_base)} paragraphs from {num_posts} blog posts"
45
 
46
  except Exception as e:
47
  return False, f"❌ Error: {str(e)}"
 
122
  def analyze(self, orphan_url, num_sources=3):
123
  """
124
  Complete analysis: Find sources, placements, and generate report
 
125
  """
126
 
127
  # Get orphan page metadata
 
247
  def generate_report(self, orphan_url, orphan_title, results):
248
  """Generate markdown report"""
249
 
250
+ report = f"# πŸ”— Internal Linking Report\n\n"
251
+ report += f"**Orphan Page:** {orphan_title}\n"
252
+ report += f"**Target URL:** `{orphan_url}`\n"
253
+ report += f"**Links Found:** {len(results)}\n\n"
254
+ report += "---\n\n"
255
+
256
+ for i, result in enumerate(results, 1):
257
+ report += f"## Link {i}: {result['source_title']}\n\n"
258
+ report += f"**Source URL:** `{result['source_url']}`\n"
259
+ report += f"**Paragraph #:** {result['paragraph_index']}\n"
260
+ report += f"**Relevance Score:** {result['score']}/100\n"
261
+ report += f"**Anchor Text:** \"{result['anchor_text']}\"\n\n"
262
+
263
+ report += "### Current Sentence:\n"
264
+ report += "```\n"
265
+ report += result['current_sentence'] + "\n"
266
+ report += "```\n\n"
267
+
268
+ report += "### New Sentence (with link):\n"
269
+ report += "```\n"
270
+ report += result['new_sentence'] + "\n"
271
+ report += "```\n\n"
272
+
273
+ report += "### HTML Code:\n"
274
+ report += "```html\n"
275
+ html_code = result['new_sentence'].replace(
276
+ result['anchor_text'],
277
+ f'<a href="{result["target_url"]}">{result["anchor_text"]}</a>'
278
+ )
279
+ report += html_code + "\n"
280
+ report += "```\n\n"
281
+ report += "---\n\n"
282
+
283
+ return report
284
 
285
+ # ============================================
286
+ # GLOBAL STATE
287
+ # ============================================
288
+ kb = KnowledgeBase()
289
+ analyzer = None
290
 
291
+ # ============================================
292
+ # GRADIO FUNCTIONS
293
+ # ============================================
294
+ def setup(api_key, hf_token):
295
+ """Setup API and load knowledge base"""
296
+ global analyzer
297
+
298
+ status = []
299
+
300
+ # Setup API
301
+ if not api_key or not api_key.strip():
302
+ return "❌ Please enter your OpenRouter API key", None
303
+
304
+ try:
305
+ client = OpenRouterClient(api_key)
306
+ status.append("βœ… API key configured")
307
+ except Exception as e:
308
+ return f"❌ API Error: {str(e)}", None
309
+
310
+ # Load knowledge base
311
+ token = hf_token.strip() if hf_token else None
312
+ success, message = kb.load_from_huggingface(HF_DATASET_REPO, token)
313
+
314
+ if not success:
315
+ return f"βœ… API key configured\n{message}", None
316
+
317
+ status.append(message)
318
+
319
+ # Create analyzer
320
+ analyzer = OrphanPageAnalyzer(kb, client)
321
+ status.append("βœ… System ready!")
322
+
323
+ return "\n".join(status), None
324
 
325
+ def analyze_orphan(orphan_url, num_sources):
326
+ """Analyze orphan page and generate report"""
327
+
328
+ if not analyzer:
329
+ return "❌ Please complete setup first", None
330
+
331
+ if not orphan_url or not orphan_url.strip():
332
+ return "❌ Please enter an orphan page URL", None
333
+
334
+ try:
335
+ report, table = analyzer.analyze(orphan_url, num_sources)
336
+ return report, table
337
+ except Exception as e:
338
+ return f"❌ Error: {str(e)}", None
339
 
340
+ # ============================================
341
+ # INTERFACE
342
+ # ============================================
343
+ with gr.Blocks(title="Edstellar Internal Linking Tool", theme=gr.themes.Soft()) as app:
344
+
345
+ gr.Markdown("# πŸ”— Edstellar Internal Linking Tool")
346
+ gr.Markdown("Enter an orphan page URL to get instant internal linking recommendations")
347
+
348
+ # Setup Section
349
+ with gr.Accordion("βš™οΈ Setup (Click to expand - Do this once)", open=True):
350
+ gr.Markdown("### Step 1: Configure API Keys")
351
+
352
+ with gr.Row():
353
+ api_key = gr.Textbox(
354
+ label="OpenRouter API Key",
355
+ placeholder="sk-or-v1-...",
356
+ type="password",
357
+ scale=2
358
+ )
359
+ hf_token = gr.Textbox(
360
+ label="Hugging Face Token (optional)",
361
+ placeholder="hf_...",
362
+ type="password",
363
+ scale=2
364
+ )
365
+
366
+ setup_btn = gr.Button("πŸš€ Setup System", variant="primary", size="lg")
367
+ setup_status = gr.Textbox(label="Setup Status", lines=3, interactive=False)
368
+
369
+ gr.Markdown("---")
370
+
371
+ # Analysis Section
372
+ gr.Markdown("### πŸ“Š Analyze Orphan Page")
373
+
374
+ with gr.Row():
375
+ orphan_url_input = gr.Textbox(
376
+ label="Orphan Page URL",
377
+ placeholder="https://edstellar.com/blog/your-orphan-page",
378
+ scale=3
379
+ )
380
+ num_sources_input = gr.Slider(
381
+ label="Number of Sources",
382
+ minimum=3,
383
+ maximum=5,
384
+ value=3,
385
+ step=1,
386
+ scale=1
387
+ )
388
+
389
+ analyze_btn = gr.Button("πŸ” Analyze & Generate Report", variant="primary", size="lg")
390
+
391
+ gr.Markdown("---")
392
+
393
+ # Results Section
394
+ gr.Markdown("### πŸ“„ Report")
395
+
396
+ report_output = gr.Markdown()
397
+
398
+ gr.Markdown("### πŸ“Š Summary Table")
399
+ table_output = gr.Dataframe(
400
+ label="Quick Overview",
401
+ wrap=True,
402
+ interactive=False
403
+ )
404
+
405
+ # Wire up events
406
+ setup_btn.click(
407
+ setup,
408
+ inputs=[api_key, hf_token],
409
+ outputs=[setup_status, table_output]
410
+ )
411
+
412
+ analyze_btn.click(
413
+ analyze_orphan,
414
+ inputs=[orphan_url_input, num_sources_input],
415
+ outputs=[report_output, table_output]
416
+ )
417
 
418
+ # Launch
419
+ if __name__ == "__main__":
420
+ app.launch()