Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from Kanun_Patrika_Scraper_For_HFSpaces import LegalCaseScraper | |
| import os | |
| import shutil | |
| import zipfile | |
| def run_scraper(mudda_type, nepali_year, progress=gr.Progress()): | |
| """ | |
| Run the scraper with the given inputs and update progress. | |
| Returns status message and file paths for download. | |
| """ | |
| try: | |
| # Initialize scraper | |
| scraper = LegalCaseScraper(output_db="legal_cases.db", html_folder="scraped_html") | |
| # Validate inputs | |
| if not mudda_type or not nepali_year: | |
| return "Error: Please select a mudda type and enter a Nepali year.", None, None | |
| # Run scraper | |
| progress(0.1, desc="Starting scraper...") | |
| scraper.run_scraper(mudda_type=mudda_type, sal=nepali_year, use_saved=True) | |
| # Check for files | |
| db_path = "legal_cases.db" | |
| html_zip_path = "scraped_html.zip" | |
| status = f"Scraping completed for mudda_type: {mudda_type}, year: {nepali_year}. Data saved to SQLite database.\n" | |
| # Verify database file | |
| if os.path.exists(db_path): | |
| status += f"Database file found at: {db_path}\n" | |
| else: | |
| status += "Warning: Database file (legal_cases.db) not found.\n" | |
| # Create zip of scraped_html folder | |
| if os.path.exists("scraped_html") and any(os.scandir("scraped_html")): | |
| shutil.make_archive("scraped_html", 'zip', "scraped_html") | |
| if os.path.exists(html_zip_path): | |
| status += f"HTML files zipped at: {html_zip_path}" | |
| else: | |
| status += "Warning: Failed to create scraped_html.zip." | |
| else: | |
| status += "Warning: No HTML files found in scraped_html folder." | |
| progress(1.0, desc="Scraping completed!") | |
| return ( | |
| status, | |
| db_path if os.path.exists(db_path) else None, | |
| html_zip_path if os.path.exists(html_zip_path) else None | |
| ) | |
| except Exception as e: | |
| return f"Error: {str(e)}", None, None | |
| finally: | |
| scraper.close() | |
| # Define Gradio interface using Blocks | |
| with gr.Blocks(title="Nepal Kanoon Patrika Scraper") as demo: | |
| gr.Markdown("# Nepal Kanoon Patrika Scraper") | |
| gr.Markdown("Scrape legal case data from Nepal Kanoon Patrika website. Select a mudda type and enter a Nepali year to begin.") | |
| with gr.Row(): | |
| mudda_type = gr.Dropdown( | |
| choices=[ | |
| "दुनियाबादी देवानी", | |
| "सरकारबादी देवानी", | |
| "दुनियावादी फौजदारी", | |
| "सरकारवादी फौजदारी", | |
| "रिट", | |
| "निवेदन", | |
| "विविध" | |
| ], | |
| label="Mudda Type", | |
| info="Select the type of legal case" | |
| ) | |
| nepali_year = gr.Textbox(label="Nepali Year", placeholder="e.g., २०७३", max_lines=1) | |
| run_button = gr.Button("Run Scraper") | |
| output = gr.Textbox(label="Status", interactive=False) | |
| db_download = gr.File(label="Download SQLite Database") | |
| html_download = gr.File(label="Download Scraped HTML (Zipped)") | |
| run_button.click( | |
| fn=run_scraper, | |
| inputs=[mudda_type, nepali_year], | |
| outputs=[output, db_download, html_download] | |
| ) | |
| # Launch the interface | |
| demo.launch() |