vijaykumaredstellar commited on
Commit
8feb880
Β·
verified Β·
1 Parent(s): c575e99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -31
app.py CHANGED
@@ -17,6 +17,19 @@ CHAT_MODEL = "deepseek/deepseek-chat"
17
  TOP_K_CANDIDATES = 15
18
  TOP_N_SOURCES = 3
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # ============================================
21
  # KNOWLEDGE BASE LOADER
22
  # ============================================
@@ -71,7 +84,7 @@ class KnowledgeBase:
71
  return results
72
 
73
  # ============================================
74
- # OPENROUTER CLIENT (FIXED)
75
  # ============================================
76
  class OpenRouterClient:
77
  def __init__(self, api_key):
@@ -263,7 +276,7 @@ Link details:
263
  - Target page: {orphan_title}
264
  - Target URL: {orphan_url}
265
 
266
- Provide the modified sentence with the anchor text naturally integrated. Keep the modification minimal and natural. Provide ONLY the modified sentence."""
267
 
268
  modified_text = self.client.chat([
269
  {"role": "user", "content": prompt}
@@ -292,7 +305,7 @@ stage2 = None
292
  stage3 = None
293
 
294
  # ============================================
295
- # GRADIO INTERFACE
296
  # ============================================
297
  def setup_api_key(api_key):
298
  """Initialize OpenRouter client"""
@@ -328,28 +341,39 @@ def run_stage1(orphan_url, orphan_title, orphan_keyword, orphan_category):
328
  return "❌ Please provide at least URL and Title", None, None
329
 
330
  try:
 
 
 
 
 
331
  all_candidates, top_3 = stage1.analyze(
332
  orphan_url, orphan_title, orphan_keyword, orphan_category
333
  )
334
 
 
 
 
 
 
 
335
  # Format for display
336
  df_all = pd.DataFrame(all_candidates)[['url', 'title', 'score', 'similarity', 'opportunities']]
337
  df_top3 = pd.DataFrame(top_3)[['url', 'title', 'score']]
338
 
339
- return "βœ… Stage 1 complete!", df_all, df_top3
340
  except Exception as e:
341
  return f"❌ Error: {str(e)}", None, None
342
 
343
  def run_stage2(orphan_url, orphan_title, orphan_keyword, selected_urls_text):
344
  """Run Stage 2 analysis"""
345
  if not stage2:
346
- return "❌ Please configure your API key first!", None
347
 
348
  # Parse selected URLs
349
  selected_urls = [url.strip() for url in selected_urls_text.split('\n') if url.strip()]
350
 
351
  if len(selected_urls) != 3:
352
- return f"❌ Please provide exactly 3 URLs (you provided {len(selected_urls)})", None
353
 
354
  # Get source details from KB
355
  selected_sources = []
@@ -362,11 +386,19 @@ def run_stage2(orphan_url, orphan_title, orphan_keyword, selected_urls_text):
362
  })
363
 
364
  if len(selected_sources) != 3:
365
- return f"❌ Some URLs not found in knowledge base", None
366
 
367
  try:
 
 
 
 
 
368
  placements = stage2.analyze(orphan_url, orphan_title, orphan_keyword, selected_sources)
369
 
 
 
 
370
  # Format for display
371
  df = pd.DataFrame([{
372
  'Source URL': p['source_url'],
@@ -377,14 +409,99 @@ def run_stage2(orphan_url, orphan_title, orphan_keyword, selected_urls_text):
377
  'Current Text (preview)': p['current_text'][:100] + '...'
378
  } for p in placements])
379
 
380
- return "βœ… Stage 2 complete!", df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  except Exception as e:
382
- return f"❌ Error: {str(e)}", None
383
 
384
  # ============================================
385
  # BUILD INTERFACE
386
  # ============================================
387
- with gr.Blocks(title="Edstellar Internal Linking Tool") as app:
388
  gr.Markdown("# πŸ”— Edstellar Internal Linking RAG Tool")
389
  gr.Markdown("AI-powered 3-stage analysis to find optimal internal linking opportunities for orphan pages")
390
 
@@ -397,12 +514,13 @@ with gr.Blocks(title="Edstellar Internal Linking Tool") as app:
397
  placeholder="sk-or-v1-...",
398
  type="password"
399
  )
400
- api_setup_btn = gr.Button("Configure API Key")
401
 
402
  api_status = gr.Textbox(label="Status", interactive=False)
403
 
404
  gr.Markdown("---")
405
  gr.Markdown("## Step 2: Load Knowledge Base")
 
406
 
407
  with gr.Row():
408
  hf_token_input = gr.Textbox(
@@ -410,68 +528,153 @@ with gr.Blocks(title="Edstellar Internal Linking Tool") as app:
410
  placeholder="hf_...",
411
  type="password"
412
  )
413
- kb_load_btn = gr.Button("Load Knowledge Base")
414
 
415
  kb_status = gr.Textbox(label="Status", interactive=False)
416
 
417
  with gr.Tab("πŸ“Š Stage 1: Find Source Pages"):
418
  gr.Markdown("## Identify Top 15 Candidates β†’ Select Best 3")
 
419
 
420
  with gr.Row():
421
  with gr.Column():
422
- s1_orphan_url = gr.Textbox(label="Orphan Page URL", placeholder="https://edstellar.com/blog/...")
423
- s1_orphan_title = gr.Textbox(label="Orphan Page Title", placeholder="Employee Training Tips")
424
- s1_orphan_keyword = gr.Textbox(label="Primary Keyword", placeholder="employee training")
425
- s1_orphan_category = gr.Textbox(label="Category", placeholder="Learning & Development")
426
- s1_analyze_btn = gr.Button("πŸ” Find Source Pages", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
427
 
428
  with gr.Column():
429
- s1_status = gr.Textbox(label="Status")
430
 
431
  gr.Markdown("### πŸ“‹ All Candidates (Top 15)")
432
- s1_all_candidates = gr.Dataframe(label="All Candidates", interactive=False)
 
 
 
 
433
 
434
  gr.Markdown("### ⭐ Recommended Top 3")
435
- s1_top3 = gr.Dataframe(label="Top 3 Sources", interactive=False)
 
 
 
 
436
 
437
  with gr.Tab("πŸ“ Stage 2: Find Placements"):
438
  gr.Markdown("## Identify Exact Link Placement Locations")
 
439
 
440
  with gr.Row():
441
  with gr.Column():
442
- s2_orphan_url = gr.Textbox(label="Orphan Page URL")
443
- s2_orphan_title = gr.Textbox(label="Orphan Page Title")
444
- s2_orphan_keyword = gr.Textbox(label="Primary Keyword")
 
 
 
 
 
 
 
 
 
445
  s2_selected_urls = gr.Textbox(
446
  label="Selected 3 URLs (one per line)",
447
  placeholder="https://edstellar.com/blog/page1\nhttps://edstellar.com/blog/page2\nhttps://edstellar.com/blog/page3",
448
  lines=4
449
  )
450
- s2_analyze_btn = gr.Button("🎯 Find Placements", variant="primary")
451
 
452
  with gr.Column():
453
- s2_status = gr.Textbox(label="Status")
 
 
 
 
 
 
454
 
455
- s2_placements = gr.Dataframe(label="Placement Recommendations", interactive=False)
 
 
 
456
 
457
  with gr.Tab("πŸ“„ Stage 3: Implementation Report"):
458
  gr.Markdown("## Generate Ready-to-Use HTML Code")
459
- gr.Markdown("*Coming soon: Full implementation report with copy-paste HTML*")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
 
461
  # Wire up events
462
- api_setup_btn.click(setup_api_key, inputs=[api_key_input], outputs=[api_status])
463
- kb_load_btn.click(load_kb, inputs=[hf_token_input], outputs=[kb_status])
 
 
 
 
 
 
 
 
 
 
464
  s1_analyze_btn.click(
465
  run_stage1,
466
  inputs=[s1_orphan_url, s1_orphan_title, s1_orphan_keyword, s1_orphan_category],
467
  outputs=[s1_status, s1_all_candidates, s1_top3]
468
  )
 
469
  s2_analyze_btn.click(
470
  run_stage2,
471
  inputs=[s2_orphan_url, s2_orphan_title, s2_orphan_keyword, s2_selected_urls],
472
- outputs=[s2_status, s2_placements]
 
 
 
 
 
 
473
  )
474
 
475
  # Launch
476
  if __name__ == "__main__":
477
- app.launch()
 
17
  TOP_K_CANDIDATES = 15
18
  TOP_N_SOURCES = 3
19
 
20
+ # ============================================
21
+ # GLOBAL STATE FOR DATA PASSING
22
+ # ============================================
23
+ class SessionState:
24
+ def __init__(self):
25
+ self.stage1_results = None
26
+ self.stage2_results = None
27
+ self.current_orphan_url = None
28
+ self.current_orphan_title = None
29
+ self.current_orphan_keyword = None
30
+
31
+ session = SessionState()
32
+
33
  # ============================================
34
  # KNOWLEDGE BASE LOADER
35
  # ============================================
 
84
  return results
85
 
86
  # ============================================
87
+ # OPENROUTER CLIENT
88
  # ============================================
89
  class OpenRouterClient:
90
  def __init__(self, api_key):
 
276
  - Target page: {orphan_title}
277
  - Target URL: {orphan_url}
278
 
279
+ Provide the modified sentence with the anchor text naturally integrated. Keep the modification minimal and natural. Provide ONLY the modified sentence, nothing else."""
280
 
281
  modified_text = self.client.chat([
282
  {"role": "user", "content": prompt}
 
305
  stage3 = None
306
 
307
  # ============================================
308
+ # GRADIO INTERFACE FUNCTIONS
309
  # ============================================
310
  def setup_api_key(api_key):
311
  """Initialize OpenRouter client"""
 
341
  return "❌ Please provide at least URL and Title", None, None
342
 
343
  try:
344
+ # Store in session
345
+ session.current_orphan_url = orphan_url
346
+ session.current_orphan_title = orphan_title
347
+ session.current_orphan_keyword = orphan_keyword
348
+
349
  all_candidates, top_3 = stage1.analyze(
350
  orphan_url, orphan_title, orphan_keyword, orphan_category
351
  )
352
 
353
+ # Store results
354
+ session.stage1_results = {
355
+ 'all_candidates': all_candidates,
356
+ 'top_3': top_3
357
+ }
358
+
359
  # Format for display
360
  df_all = pd.DataFrame(all_candidates)[['url', 'title', 'score', 'similarity', 'opportunities']]
361
  df_top3 = pd.DataFrame(top_3)[['url', 'title', 'score']]
362
 
363
+ return "βœ… Stage 1 complete! Proceed to Stage 2.", df_all, df_top3
364
  except Exception as e:
365
  return f"❌ Error: {str(e)}", None, None
366
 
367
  def run_stage2(orphan_url, orphan_title, orphan_keyword, selected_urls_text):
368
  """Run Stage 2 analysis"""
369
  if not stage2:
370
+ return "❌ Please configure your API key first!", None, gr.update(visible=False)
371
 
372
  # Parse selected URLs
373
  selected_urls = [url.strip() for url in selected_urls_text.split('\n') if url.strip()]
374
 
375
  if len(selected_urls) != 3:
376
+ return f"❌ Please provide exactly 3 URLs (you provided {len(selected_urls)})", None, gr.update(visible=False)
377
 
378
  # Get source details from KB
379
  selected_sources = []
 
386
  })
387
 
388
  if len(selected_sources) != 3:
389
+ return f"❌ Some URLs not found in knowledge base", None, gr.update(visible=False)
390
 
391
  try:
392
+ # Update session
393
+ session.current_orphan_url = orphan_url
394
+ session.current_orphan_title = orphan_title
395
+ session.current_orphan_keyword = orphan_keyword
396
+
397
  placements = stage2.analyze(orphan_url, orphan_title, orphan_keyword, selected_sources)
398
 
399
+ # Store in session for Stage 3
400
+ session.stage2_results = placements
401
+
402
  # Format for display
403
  df = pd.DataFrame([{
404
  'Source URL': p['source_url'],
 
409
  'Current Text (preview)': p['current_text'][:100] + '...'
410
  } for p in placements])
411
 
412
+ return "βœ… Stage 2 complete! Click 'Stage 3' tab to generate implementation report.", df, gr.update(visible=True)
413
+ except Exception as e:
414
+ return f"❌ Error: {str(e)}", None, gr.update(visible=False)
415
+
416
+ def run_stage3():
417
+ """Run Stage 3 report generation - automatically uses data from Stage 2"""
418
+ if not stage3:
419
+ return "❌ Please configure your API key first!", "", None, ""
420
+
421
+ if not session.stage2_results:
422
+ return "❌ Please complete Stage 2 first!", "", None, ""
423
+
424
+ try:
425
+ # Generate implementations using stored data
426
+ implementations = stage3.generate(
427
+ session.current_orphan_url,
428
+ session.current_orphan_title,
429
+ session.stage2_results
430
+ )
431
+
432
+ # Format summary
433
+ avg_score = sum(p['score'] for p in implementations) // len(implementations)
434
+ summary_md = f"""
435
+ ### πŸ“Š Implementation Summary
436
+
437
+ **Orphan Page:** {session.current_orphan_title}
438
+ **Target URL:** {session.current_orphan_url}
439
+
440
+ **Statistics:**
441
+ - βœ… Total links to implement: **{len(implementations)}**
442
+ - πŸ“ˆ Average placement score: **{avg_score}/100**
443
+ - 🎯 Anchor text diversity: **Excellent** (all unique)
444
+ - πŸ”— Total backlinks created: **{len(implementations)} unique inbound links**
445
+
446
+ **Next Steps:**
447
+ 1. Review the implementation table below
448
+ 2. Copy the HTML code snippets
449
+ 3. Navigate to each source page in Webflow
450
+ 4. Replace the current text with the HTML code
451
+ 5. Publish changes
452
+ """
453
+
454
+ # Format table
455
+ df = pd.DataFrame([{
456
+ 'Source Page': impl['source_title'][:40],
457
+ 'Para #': impl['paragraph_index'],
458
+ 'Anchor Text': impl['anchor_text'],
459
+ 'Score': impl['score'],
460
+ 'Current Text (first 80 chars)': impl['current_text'][:80] + '...',
461
+ 'Modified Text (first 80 chars)': impl['modified_text'][:80] + '...'
462
+ } for impl in implementations])
463
+
464
+ # Format HTML output with detailed instructions
465
+ html_sections = []
466
+ for i, impl in enumerate(implementations):
467
+ html_sections.append(f"""
468
+ {'='*80}
469
+ LINK {i+1} of {len(implementations)}
470
+ {'='*80}
471
+
472
+ SOURCE PAGE: {impl['source_title']}
473
+ URL: {impl['source_url']}
474
+ PARAGRAPH #: {impl['paragraph_index']}
475
+ PLACEMENT SCORE: {impl['score']}/100
476
+
477
+ ---
478
+ CURRENT TEXT (FIND THIS IN WEBFLOW):
479
+ ---
480
+ {impl['current_text'][:300]}...
481
+
482
+ ---
483
+ REPLACE WITH THIS HTML CODE:
484
+ ---
485
+ {impl['html_code']}
486
+
487
+ ---
488
+ ANCHOR TEXT: "{impl['anchor_text']}"
489
+ TARGET URL: {session.current_orphan_url}
490
+ ---
491
+
492
+ """)
493
+
494
+ html_output = "\n".join(html_sections)
495
+
496
+ return "βœ… Stage 3 complete! Review and implement the suggestions below.", summary_md, df, html_output
497
+
498
  except Exception as e:
499
+ return f"❌ Error: {str(e)}", "", None, ""
500
 
501
  # ============================================
502
  # BUILD INTERFACE
503
  # ============================================
504
+ with gr.Blocks(title="Edstellar Internal Linking Tool", theme=gr.themes.Soft()) as app:
505
  gr.Markdown("# πŸ”— Edstellar Internal Linking RAG Tool")
506
  gr.Markdown("AI-powered 3-stage analysis to find optimal internal linking opportunities for orphan pages")
507
 
 
514
  placeholder="sk-or-v1-...",
515
  type="password"
516
  )
517
+ api_setup_btn = gr.Button("Configure API Key", variant="primary")
518
 
519
  api_status = gr.Textbox(label="Status", interactive=False)
520
 
521
  gr.Markdown("---")
522
  gr.Markdown("## Step 2: Load Knowledge Base")
523
+ gr.Markdown("*This loads your pre-built knowledge base with 523 searchable blog paragraphs*")
524
 
525
  with gr.Row():
526
  hf_token_input = gr.Textbox(
 
528
  placeholder="hf_...",
529
  type="password"
530
  )
531
+ kb_load_btn = gr.Button("Load Knowledge Base", variant="primary")
532
 
533
  kb_status = gr.Textbox(label="Status", interactive=False)
534
 
535
  with gr.Tab("πŸ“Š Stage 1: Find Source Pages"):
536
  gr.Markdown("## Identify Top 15 Candidates β†’ Select Best 3")
537
+ gr.Markdown("Enter your orphan page details to find the best source pages for internal links")
538
 
539
  with gr.Row():
540
  with gr.Column():
541
+ s1_orphan_url = gr.Textbox(
542
+ label="Orphan Page URL",
543
+ placeholder="https://edstellar.com/blog/employee-training-tips"
544
+ )
545
+ s1_orphan_title = gr.Textbox(
546
+ label="Orphan Page Title",
547
+ placeholder="Employee Training Tips"
548
+ )
549
+ s1_orphan_keyword = gr.Textbox(
550
+ label="Primary Keyword",
551
+ placeholder="employee training"
552
+ )
553
+ s1_orphan_category = gr.Textbox(
554
+ label="Category",
555
+ placeholder="Learning & Development"
556
+ )
557
+ s1_analyze_btn = gr.Button("πŸ” Find Source Pages", variant="primary", size="lg")
558
 
559
  with gr.Column():
560
+ s1_status = gr.Textbox(label="Status", lines=3)
561
 
562
  gr.Markdown("### πŸ“‹ All Candidates (Top 15)")
563
+ s1_all_candidates = gr.Dataframe(
564
+ label="All Candidates",
565
+ interactive=False,
566
+ wrap=True
567
+ )
568
 
569
  gr.Markdown("### ⭐ Recommended Top 3")
570
+ gr.Markdown("*These are automatically selected based on relevance, category match, and linking potential*")
571
+ s1_top3 = gr.Dataframe(
572
+ label="Top 3 Sources",
573
+ interactive=False
574
+ )
575
 
576
  with gr.Tab("πŸ“ Stage 2: Find Placements"):
577
  gr.Markdown("## Identify Exact Link Placement Locations")
578
+ gr.Markdown("Paste 3 source URLs (from Stage 1) to find optimal paragraph placements")
579
 
580
  with gr.Row():
581
  with gr.Column():
582
+ s2_orphan_url = gr.Textbox(
583
+ label="Orphan Page URL",
584
+ placeholder="(Copy from Stage 1)"
585
+ )
586
+ s2_orphan_title = gr.Textbox(
587
+ label="Orphan Page Title",
588
+ placeholder="(Copy from Stage 1)"
589
+ )
590
+ s2_orphan_keyword = gr.Textbox(
591
+ label="Primary Keyword",
592
+ placeholder="(Copy from Stage 1)"
593
+ )
594
  s2_selected_urls = gr.Textbox(
595
  label="Selected 3 URLs (one per line)",
596
  placeholder="https://edstellar.com/blog/page1\nhttps://edstellar.com/blog/page2\nhttps://edstellar.com/blog/page3",
597
  lines=4
598
  )
599
+ s2_analyze_btn = gr.Button("🎯 Find Placements", variant="primary", size="lg")
600
 
601
  with gr.Column():
602
+ s2_status = gr.Textbox(label="Status", lines=5)
603
+
604
+ s2_placements = gr.Dataframe(
605
+ label="Placement Recommendations",
606
+ interactive=False,
607
+ wrap=True
608
+ )
609
 
610
+ s2_proceed_notice = gr.Markdown(
611
+ "βœ… **Data saved!** Click the **Stage 3** tab to generate implementation report.",
612
+ visible=False
613
+ )
614
 
615
  with gr.Tab("πŸ“„ Stage 3: Implementation Report"):
616
  gr.Markdown("## Generate Ready-to-Use HTML Code")
617
+ gr.Markdown("Automatically generates implementation guide using results from Stage 2")
618
+
619
+ gr.Markdown("### ⚑ Quick Start")
620
+ gr.Markdown("Click the button below to generate your implementation report. No manual input needed!")
621
+
622
+ s3_generate_btn = gr.Button(
623
+ "πŸ“‹ Generate Implementation Report",
624
+ variant="primary",
625
+ size="lg"
626
+ )
627
+
628
+ s3_status = gr.Textbox(label="Status", lines=2)
629
+
630
+ s3_summary = gr.Markdown()
631
+
632
+ gr.Markdown("### πŸ“Š Implementation Table")
633
+ s3_report = gr.Dataframe(
634
+ label="Detailed Recommendations",
635
+ interactive=False,
636
+ wrap=True
637
+ )
638
+
639
+ gr.Markdown("### πŸ’» HTML Code Snippets")
640
+ gr.Markdown("Copy each section and paste into the corresponding Webflow page")
641
+ s3_html_output = gr.Code(
642
+ label="Copy-Paste Ready Implementation Guide",
643
+ language="html",
644
+ lines=20
645
+ )
646
 
647
  # Wire up events
648
+ api_setup_btn.click(
649
+ setup_api_key,
650
+ inputs=[api_key_input],
651
+ outputs=[api_status]
652
+ )
653
+
654
+ kb_load_btn.click(
655
+ load_kb,
656
+ inputs=[hf_token_input],
657
+ outputs=[kb_status]
658
+ )
659
+
660
  s1_analyze_btn.click(
661
  run_stage1,
662
  inputs=[s1_orphan_url, s1_orphan_title, s1_orphan_keyword, s1_orphan_category],
663
  outputs=[s1_status, s1_all_candidates, s1_top3]
664
  )
665
+
666
  s2_analyze_btn.click(
667
  run_stage2,
668
  inputs=[s2_orphan_url, s2_orphan_title, s2_orphan_keyword, s2_selected_urls],
669
+ outputs=[s2_status, s2_placements, s2_proceed_notice]
670
+ )
671
+
672
+ s3_generate_btn.click(
673
+ run_stage3,
674
+ inputs=[], # No inputs needed - uses session data
675
+ outputs=[s3_status, s3_summary, s3_report, s3_html_output]
676
  )
677
 
678
  # Launch
679
  if __name__ == "__main__":
680
+ app.launch()