Rudraaaa76 commited on
Commit
1671e25
·
verified ·
1 Parent(s): 370bd62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -13
app.py CHANGED
@@ -539,7 +539,6 @@ async def scrape(request: ScrapeRequest):
539
  print(f"[Scraper] Endpoint error: {e}")
540
  return ScrapeResponse(platform=platform, url=url, scrape_success=False)
541
 
542
-
543
  # ============================================================
544
  # LISTING PAGE CRAWLERS — for discovery / public_hackathons
545
  # ============================================================
@@ -594,16 +593,10 @@ DEVFOLIO_EXTRACT = """() => {
594
  if (!name || name.length < 3) return;
595
 
596
  // Walk up to the card container to find banner and other data
597
- let container = card;
598
- for(let i=0; i<6; i++) {
599
- if(container.querySelector('img')) break;
600
- if(container.parentElement && container.parentElement.tagName !== 'BODY') {
601
- container = container.parentElement;
602
- }
603
- }
604
 
605
  const imgEl = container.querySelector('img') || card.querySelector('img');
606
- const banner = imgEl ? (imgEl.getAttribute('src') || imgEl.getAttribute('data-src') || imgEl.src || '') : '';
607
 
608
  const descEl = container.querySelector('p') || card.querySelector('p');
609
  const description = descEl ? descEl.textContent.trim().substring(0, 500) : '';
@@ -612,7 +605,7 @@ DEVFOLIO_EXTRACT = """() => {
612
 
613
  // Extract prize
614
  let prize = '';
615
- const prizeMatch = allText.match(/(?:[\u20B9$\u20AC\u00A3]|INR|USD|Rs\.?)\s*[\d,]+(?:\.\d+)?(?:\s*(?:Lakhs?|Lacs?|Crores?|K|k|L|M))?/i);
616
  if (prizeMatch) prize = prizeMatch[0].trim();
617
 
618
  // Extract dates like "Mar 25 - 27, 2026" or "Runs from ..."
@@ -738,12 +731,12 @@ UNSTOP_EXTRACT = """() => {
738
  if (!name || name.length < 3) return;
739
 
740
  const imgEl = card.querySelector('img');
741
- const banner = imgEl ? (imgEl.getAttribute('src') || imgEl.getAttribute('data-src') || imgEl.src || '') : '';
742
 
743
  const allText = card.textContent || '';
744
 
745
  let prize = '';
746
- const prizeMatch = allText.match(/(?:₹|INR|Rs\.?|\$|USD)\s*[\d,]+(?:\.\d+)?(?:\s*(?:Lakhs?|Lacs?|Crores?|K|k|L|M))?/i);
747
  if (prizeMatch) prize = prizeMatch[0].trim();
748
 
749
  const tags = [];
@@ -898,4 +891,4 @@ async def crawl_all():
898
  "unstop": len(unstop),
899
  },
900
  "hackathons": all_results,
901
- }
 
539
  print(f"[Scraper] Endpoint error: {e}")
540
  return ScrapeResponse(platform=platform, url=url, scrape_success=False)
541
 
 
542
  # ============================================================
543
  # LISTING PAGE CRAWLERS — for discovery / public_hackathons
544
  # ============================================================
 
593
  if (!name || name.length < 3) return;
594
 
595
  // Walk up to the card container to find banner and other data
596
+ const container = card.closest('div') || card.parentElement?.closest('div') || card;
 
 
 
 
 
 
597
 
598
  const imgEl = container.querySelector('img') || card.querySelector('img');
599
+ const banner = imgEl ? (imgEl.src || imgEl.getAttribute('data-src') || '') : '';
600
 
601
  const descEl = container.querySelector('p') || card.querySelector('p');
602
  const description = descEl ? descEl.textContent.trim().substring(0, 500) : '';
 
605
 
606
  // Extract prize
607
  let prize = '';
608
+ const prizeMatch = allText.match(/[\u20B9$\u20AC\u00A3]\s*[\d,]+(?:\.\d+)?(?:\s*(?:Lakhs?|Lacs?|Crores?|K|k|L|M))?/);
609
  if (prizeMatch) prize = prizeMatch[0].trim();
610
 
611
  // Extract dates like "Mar 25 - 27, 2026" or "Runs from ..."
 
731
  if (!name || name.length < 3) return;
732
 
733
  const imgEl = card.querySelector('img');
734
+ const banner = imgEl ? (imgEl.src || '') : '';
735
 
736
  const allText = card.textContent || '';
737
 
738
  let prize = '';
739
+ const prizeMatch = allText.match(/(?:₹|INR|Rs\\.?)\\s*[\\d,]+(?:\\.\\d+)?(?:\\s*(?:Lakhs?|Lacs?|Crores?|K|k|L))?/i);
740
  if (prizeMatch) prize = prizeMatch[0].trim();
741
 
742
  const tags = [];
 
891
  "unstop": len(unstop),
892
  },
893
  "hackathons": all_results,
894
+ }