Spaces:
Sleeping
Sleeping
| import subprocess | |
| import io | |
| import tempfile | |
| import gradio as gr | |
| def run_katana(url, crawl_type): | |
| try: | |
| if crawl_type == "All URLs": | |
| command = ["katana", "-u", url] | |
| else: # Subkeyword URLs | |
| command = [ | |
| "katana", | |
| "-u", f"{url}", | |
| "-cs", f"^{url}.*", | |
| "-depth", "5", | |
| "-jc" | |
| ] | |
| result = subprocess.run(command, capture_output=True, text=True, check=True) | |
| # Create an in-memory file-like object | |
| buffer = io.StringIO(result.stdout) | |
| return result.stdout, buffer | |
| except Exception as e: | |
| return str(e), None | |
| # Modify the process_and_display function to include the crawl_type parameter | |
| def process_and_display(url, crawl_type): | |
| result, file_data = run_katana(url, crawl_type) | |
| if file_data: | |
| # Create a temporary file with a meaningful name | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.txt') | |
| temp_file.write(file_data.getvalue().encode('utf-8')) | |
| temp_file.close() | |
| # Return the result and the path to the temporary file | |
| return result, temp_file.name | |
| else: | |
| return result, None | |
| #Update the Gradio interface to include the dropdown menu | |
| iface = gr.Interface( | |
| fn=process_and_display, | |
| inputs=[ | |
| gr.Textbox(label="Enter URL"), | |
| gr.Dropdown(choices=["All URLs", "Subkeyword URLs"], label="Crawl Type") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Crawl Results"), | |
| gr.File(label="Download Results") | |
| ], | |
| title="Katana Crawler", | |
| description="Enter a URL to crawl using Katana. Select the crawl type and results will be displayed and available for download.", | |
| allow_flagging="never" | |
| ) | |
| iface.launch(server_name="0.0.0.0", server_port=7860) |