Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -539,7 +539,6 @@ async def scrape(request: ScrapeRequest):
|
|
| 539 |
print(f"[Scraper] Endpoint error: {e}")
|
| 540 |
return ScrapeResponse(platform=platform, url=url, scrape_success=False)
|
| 541 |
|
| 542 |
-
|
| 543 |
# ============================================================
|
| 544 |
# LISTING PAGE CRAWLERS — for discovery / public_hackathons
|
| 545 |
# ============================================================
|
|
@@ -594,16 +593,10 @@ DEVFOLIO_EXTRACT = """() => {
|
|
| 594 |
if (!name || name.length < 3) return;
|
| 595 |
|
| 596 |
// Walk up to the card container to find banner and other data
|
| 597 |
-
|
| 598 |
-
for(let i=0; i<6; i++) {
|
| 599 |
-
if(container.querySelector('img')) break;
|
| 600 |
-
if(container.parentElement && container.parentElement.tagName !== 'BODY') {
|
| 601 |
-
container = container.parentElement;
|
| 602 |
-
}
|
| 603 |
-
}
|
| 604 |
|
| 605 |
const imgEl = container.querySelector('img') || card.querySelector('img');
|
| 606 |
-
const banner = imgEl ? (imgEl.
|
| 607 |
|
| 608 |
const descEl = container.querySelector('p') || card.querySelector('p');
|
| 609 |
const description = descEl ? descEl.textContent.trim().substring(0, 500) : '';
|
|
@@ -612,7 +605,7 @@ DEVFOLIO_EXTRACT = """() => {
|
|
| 612 |
|
| 613 |
// Extract prize
|
| 614 |
let prize = '';
|
| 615 |
-
const prizeMatch = allText.match(/
|
| 616 |
if (prizeMatch) prize = prizeMatch[0].trim();
|
| 617 |
|
| 618 |
// Extract dates like "Mar 25 - 27, 2026" or "Runs from ..."
|
|
@@ -738,12 +731,12 @@ UNSTOP_EXTRACT = """() => {
|
|
| 738 |
if (!name || name.length < 3) return;
|
| 739 |
|
| 740 |
const imgEl = card.querySelector('img');
|
| 741 |
-
const banner = imgEl ? (imgEl.
|
| 742 |
|
| 743 |
const allText = card.textContent || '';
|
| 744 |
|
| 745 |
let prize = '';
|
| 746 |
-
const prizeMatch = allText.match(/(?:₹|INR|Rs\.?
|
| 747 |
if (prizeMatch) prize = prizeMatch[0].trim();
|
| 748 |
|
| 749 |
const tags = [];
|
|
@@ -898,4 +891,4 @@ async def crawl_all():
|
|
| 898 |
"unstop": len(unstop),
|
| 899 |
},
|
| 900 |
"hackathons": all_results,
|
| 901 |
-
}
|
|
|
|
| 539 |
print(f"[Scraper] Endpoint error: {e}")
|
| 540 |
return ScrapeResponse(platform=platform, url=url, scrape_success=False)
|
| 541 |
|
|
|
|
| 542 |
# ============================================================
|
| 543 |
# LISTING PAGE CRAWLERS — for discovery / public_hackathons
|
| 544 |
# ============================================================
|
|
|
|
| 593 |
if (!name || name.length < 3) return;
|
| 594 |
|
| 595 |
// Walk up to the card container to find banner and other data
|
| 596 |
+
const container = card.closest('div') || card.parentElement?.closest('div') || card;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 597 |
|
| 598 |
const imgEl = container.querySelector('img') || card.querySelector('img');
|
| 599 |
+
const banner = imgEl ? (imgEl.src || imgEl.getAttribute('data-src') || '') : '';
|
| 600 |
|
| 601 |
const descEl = container.querySelector('p') || card.querySelector('p');
|
| 602 |
const description = descEl ? descEl.textContent.trim().substring(0, 500) : '';
|
|
|
|
| 605 |
|
| 606 |
// Extract prize
|
| 607 |
let prize = '';
|
| 608 |
+
const prizeMatch = allText.match(/[\u20B9$\u20AC\u00A3]\s*[\d,]+(?:\.\d+)?(?:\s*(?:Lakhs?|Lacs?|Crores?|K|k|L|M))?/);
|
| 609 |
if (prizeMatch) prize = prizeMatch[0].trim();
|
| 610 |
|
| 611 |
// Extract dates like "Mar 25 - 27, 2026" or "Runs from ..."
|
|
|
|
| 731 |
if (!name || name.length < 3) return;
|
| 732 |
|
| 733 |
const imgEl = card.querySelector('img');
|
| 734 |
+
const banner = imgEl ? (imgEl.src || '') : '';
|
| 735 |
|
| 736 |
const allText = card.textContent || '';
|
| 737 |
|
| 738 |
let prize = '';
|
| 739 |
+
const prizeMatch = allText.match(/(?:₹|INR|Rs\\.?)\\s*[\\d,]+(?:\\.\\d+)?(?:\\s*(?:Lakhs?|Lacs?|Crores?|K|k|L))?/i);
|
| 740 |
if (prizeMatch) prize = prizeMatch[0].trim();
|
| 741 |
|
| 742 |
const tags = [];
|
|
|
|
| 891 |
"unstop": len(unstop),
|
| 892 |
},
|
| 893 |
"hackathons": all_results,
|
| 894 |
+
}
|