Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# ==============================================
|
| 2 |
# SCREENSHOT SCRAPER FOR N8N
|
| 3 |
-
# Complete version with all improvements - GRADIO
|
| 4 |
# ==============================================
|
| 5 |
|
| 6 |
import gradio as gr
|
|
@@ -558,7 +558,8 @@ scraper = ScreenshotScraper()
|
|
| 558 |
# FASTAPI APPLICATION
|
| 559 |
# ==============================================
|
| 560 |
|
| 561 |
-
|
|
|
|
| 562 |
title="Screenshot Scraper API",
|
| 563 |
description="AI-powered web scraper that takes screenshots and extracts text using OCR",
|
| 564 |
version="2.0.0"
|
|
@@ -566,7 +567,7 @@ app = FastAPI(
|
|
| 566 |
|
| 567 |
# CORS middleware
|
| 568 |
from fastapi.middleware.cors import CORSMiddleware
|
| 569 |
-
|
| 570 |
CORSMiddleware,
|
| 571 |
allow_origins=["*"],
|
| 572 |
allow_credentials=True,
|
|
@@ -574,7 +575,7 @@ app.add_middleware(
|
|
| 574 |
allow_headers=["*"],
|
| 575 |
)
|
| 576 |
|
| 577 |
-
@
|
| 578 |
async def root():
|
| 579 |
"""Root endpoint with API information"""
|
| 580 |
return {
|
|
@@ -593,7 +594,7 @@ async def root():
|
|
| 593 |
}
|
| 594 |
}
|
| 595 |
|
| 596 |
-
@
|
| 597 |
async def health():
|
| 598 |
"""Health check endpoint"""
|
| 599 |
return {
|
|
@@ -605,7 +606,7 @@ async def health():
|
|
| 605 |
}
|
| 606 |
}
|
| 607 |
|
| 608 |
-
@
|
| 609 |
async def api_info():
|
| 610 |
"""Get system information"""
|
| 611 |
return {
|
|
@@ -619,7 +620,7 @@ async def api_info():
|
|
| 619 |
}
|
| 620 |
}
|
| 621 |
|
| 622 |
-
@
|
| 623 |
async def api_scrape(data: dict = None, url: str = None):
|
| 624 |
"""
|
| 625 |
Main scraping endpoint for n8n
|
|
@@ -676,10 +677,10 @@ async def api_scrape(data: dict = None, url: str = None):
|
|
| 676 |
}
|
| 677 |
|
| 678 |
# ==============================================
|
| 679 |
-
# GRADIO INTERFACE
|
| 680 |
# ==============================================
|
| 681 |
|
| 682 |
-
def gradio_scrape(url: str
|
| 683 |
"""Gradio interface function"""
|
| 684 |
if not url:
|
| 685 |
return "❌ Please enter a URL", {"error": "No URL provided"}
|
|
@@ -718,22 +719,15 @@ def gradio_scrape(url: str, use_html_fallback: bool = False):
|
|
| 718 |
error_msg = f"## ❌ Unexpected Error\n\n{str(e)}"
|
| 719 |
return error_msg, {"error": str(e), "url": url}
|
| 720 |
|
| 721 |
-
# Create Gradio interface
|
| 722 |
-
|
| 723 |
fn=gradio_scrape,
|
| 724 |
-
inputs=
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
),
|
| 731 |
-
gr.Checkbox(
|
| 732 |
-
label="Use HTML fallback (if available)",
|
| 733 |
-
value=False,
|
| 734 |
-
visible=False # Hidden for now
|
| 735 |
-
)
|
| 736 |
-
],
|
| 737 |
outputs=[
|
| 738 |
gr.Markdown(label="Result"),
|
| 739 |
gr.JSON(label="API Response")
|
|
@@ -753,21 +747,15 @@ iface = gr.Interface(
|
|
| 753 |
["https://en.wikipedia.org/wiki/Artificial_intelligence"],
|
| 754 |
["https://news.ycombinator.com"],
|
| 755 |
["https://httpbin.org/html"]
|
| 756 |
-
]
|
| 757 |
-
theme="soft"
|
| 758 |
-
# Removed allow_flagging parameter for Gradio 4.x compatibility
|
| 759 |
)
|
| 760 |
|
| 761 |
# ==============================================
|
| 762 |
-
# GRADIO
|
| 763 |
# ==============================================
|
| 764 |
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
# Mount Gradio app to FastAPI
|
| 768 |
-
app = gr.mount_gradio_app(app, iface, path="/")
|
| 769 |
-
|
| 770 |
-
return app
|
| 771 |
|
| 772 |
# ==============================================
|
| 773 |
# APPLICATION STARTUP
|
|
@@ -782,9 +770,6 @@ if __name__ == "__main__":
|
|
| 782 |
print(f"🔧 Default OCR Model: {Config.DEFAULT_MODEL}")
|
| 783 |
print("="*60 + "\n")
|
| 784 |
|
| 785 |
-
# Configure Gradio launch
|
| 786 |
-
app = launch_gradio()
|
| 787 |
-
|
| 788 |
uvicorn.run(
|
| 789 |
app,
|
| 790 |
host="0.0.0.0",
|
|
|
|
| 1 |
# ==============================================
|
| 2 |
# SCREENSHOT SCRAPER FOR N8N
|
| 3 |
+
# Complete version with all improvements - GRADIO COMPATIBLE
|
| 4 |
# ==============================================
|
| 5 |
|
| 6 |
import gradio as gr
|
|
|
|
| 558 |
# FASTAPI APPLICATION
|
| 559 |
# ==============================================
|
| 560 |
|
| 561 |
+
# Create FastAPI app
|
| 562 |
+
fastapi_app = FastAPI(
|
| 563 |
title="Screenshot Scraper API",
|
| 564 |
description="AI-powered web scraper that takes screenshots and extracts text using OCR",
|
| 565 |
version="2.0.0"
|
|
|
|
| 567 |
|
| 568 |
# CORS middleware
|
| 569 |
from fastapi.middleware.cors import CORSMiddleware
|
| 570 |
+
fastapi_app.add_middleware(
|
| 571 |
CORSMiddleware,
|
| 572 |
allow_origins=["*"],
|
| 573 |
allow_credentials=True,
|
|
|
|
| 575 |
allow_headers=["*"],
|
| 576 |
)
|
| 577 |
|
| 578 |
+
@fastapi_app.get("/")
|
| 579 |
async def root():
|
| 580 |
"""Root endpoint with API information"""
|
| 581 |
return {
|
|
|
|
| 594 |
}
|
| 595 |
}
|
| 596 |
|
| 597 |
+
@fastapi_app.get("/health")
|
| 598 |
async def health():
|
| 599 |
"""Health check endpoint"""
|
| 600 |
return {
|
|
|
|
| 606 |
}
|
| 607 |
}
|
| 608 |
|
| 609 |
+
@fastapi_app.get("/api/info")
|
| 610 |
async def api_info():
|
| 611 |
"""Get system information"""
|
| 612 |
return {
|
|
|
|
| 620 |
}
|
| 621 |
}
|
| 622 |
|
| 623 |
+
@fastapi_app.post("/api/scrape")
|
| 624 |
async def api_scrape(data: dict = None, url: str = None):
|
| 625 |
"""
|
| 626 |
Main scraping endpoint for n8n
|
|
|
|
| 677 |
}
|
| 678 |
|
| 679 |
# ==============================================
|
| 680 |
+
# GRADIO INTERFACE
|
| 681 |
# ==============================================
|
| 682 |
|
| 683 |
+
def gradio_scrape(url: str):
|
| 684 |
"""Gradio interface function"""
|
| 685 |
if not url:
|
| 686 |
return "❌ Please enter a URL", {"error": "No URL provided"}
|
|
|
|
| 719 |
error_msg = f"## ❌ Unexpected Error\n\n{str(e)}"
|
| 720 |
return error_msg, {"error": str(e), "url": url}
|
| 721 |
|
| 722 |
+
# Create Gradio interface
|
| 723 |
+
gradio_app = gr.Interface(
|
| 724 |
fn=gradio_scrape,
|
| 725 |
+
inputs=gr.Textbox(
|
| 726 |
+
label="Website URL",
|
| 727 |
+
placeholder="https://example.com",
|
| 728 |
+
value="https://example.com",
|
| 729 |
+
lines=1
|
| 730 |
+
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 731 |
outputs=[
|
| 732 |
gr.Markdown(label="Result"),
|
| 733 |
gr.JSON(label="API Response")
|
|
|
|
| 747 |
["https://en.wikipedia.org/wiki/Artificial_intelligence"],
|
| 748 |
["https://news.ycombinator.com"],
|
| 749 |
["https://httpbin.org/html"]
|
| 750 |
+
]
|
|
|
|
|
|
|
| 751 |
)
|
| 752 |
|
| 753 |
# ==============================================
|
| 754 |
+
# MOUNT GRADIO TO FASTAPI
|
| 755 |
# ==============================================
|
| 756 |
|
| 757 |
+
# Mount Gradio app to FastAPI
|
| 758 |
+
app = gr.mount_gradio_app(fastapi_app, gradio_app, path="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 759 |
|
| 760 |
# ==============================================
|
| 761 |
# APPLICATION STARTUP
|
|
|
|
| 770 |
print(f"🔧 Default OCR Model: {Config.DEFAULT_MODEL}")
|
| 771 |
print("="*60 + "\n")
|
| 772 |
|
|
|
|
|
|
|
|
|
|
| 773 |
uvicorn.run(
|
| 774 |
app,
|
| 775 |
host="0.0.0.0",
|