import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import os
import sys
import argparse
from urllib.parse import urlencode
import sqlite3
import json
import re
from pathlib import Path
import glob
import nepali_datetime

class LegalCaseScraper:
    def __init__(self, output_db="legal_cases_2.db", html_folder="scraped_html"):
        self.mudda_type_arr = [
            "दुनियाबादी देवानी", 
            "सरकारबादी देवानी", 
            "दुनियावादी फौजदारी", 
            "सरकारवादी फौजदारी", 
            "रिट", 
            "निवेदन", 
            "विविध"
        ]
        self.successful_entries = 0
        self.not_entered_links = []
        self.still_not_entered_links = []
        self.output_db = output_db
        self.html_folder = html_folder
        
        # Create HTML folder if it doesn't exist
        os.makedirs(self.html_folder, exist_ok=True)
        
        # Initialize SQLite database
        self.conn = sqlite3.connect(self.output_db)
        self.create_tables()

    def create_tables(self):
        """Create SQLite tables for scraped data and failed links"""
        cursor = self.conn.cursor()
        
        # Table for scraped case data
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS cases (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                लिङ्क TEXT UNIQUE,
                निर्णय_नं TEXT,
                भाग TEXT,
                मुद्दाको_किसिम TEXT,
                साल TEXT,
                महिना TEXT,
                अंक TEXT,
                फैसला_मिति TEXT,
                अदालत_वा_इजलास TEXT,
                न्यायाधीश TEXT,
                आदेश_मिति TEXT,
                केस_नम्बर TEXT,
                विषय TEXT,
                निवेदक TEXT,
                विपक्षी TEXT,
                प्रकरण TEXT,
                ठहर TEXT,
                html_file_path TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        ''')

        # Table for failed links
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS failed_links (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                मुद्दाको_किसिम TEXT,
                साल TEXT,
                लिङ्क TEXT,
                error_message TEXT,
                retry_count INTEGER DEFAULT 0,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        ''')

        self.conn.commit()

    def get_mudda_type_number(self, mudda_type):
        """Get mudda type number (1-7) from mudda type name"""
        try:
            return str(self.mudda_type_arr.index(mudda_type) + 1)
        except ValueError:
            raise ValueError(f"Invalid mudda_type: {mudda_type}. Must be one of {self.mudda_type_arr}")

    def extract_link_number(self, url):
        """Extract the number at the end of the URL"""
        match = re.search(r'/(\d+)/?$', url)
        return match.group(1) if match else "unknown"

    def generate_html_filename(self, url, mudda_type, sal):
        """Generate standardized HTML filename: mudda_number_year_link_number.html"""
        mudda_number = self.get_mudda_type_number(mudda_type)
        english_sal = self.nepali_sal_to_english_sal(sal)
        link_number = self.extract_link_number(url)
        return f"{mudda_number}_{english_sal}_{link_number}.html"

    def nepali_sal_to_english_sal(self, sal):
        """Convert Nepali numerals to English numerals"""
        if not sal:
            return ""
        
        nepali_to_english = {
            '०': '0', '१': '1', '२': '2', '३': '3', '४': '4',
            '५': '5', '६': '6', '७': '7', '८': '8', '९': '9'
        }
        try:
            return ''.join(nepali_to_english.get(char, char) for char in str(sal))
        except (TypeError, AttributeError):
            raise ValueError(f"Input must be a string containing Nepali numerals, got: {type(sal)}")
    
    def search_url(self, mudda_type, sal):
        """Generate search URL based on mudda_type and sal"""
        mudda_types = {name: str(idx + 1) for idx, name in enumerate(self.mudda_type_arr)}
        
        if mudda_type not in mudda_types:
            raise ValueError(f"Invalid mudda_type: {mudda_type}. Must be one of {self.mudda_type_arr}")
        
        english_sal = self.nepali_sal_to_english_sal(sal)
        base_url = "https://nkp.gov.np/"
        params = {
            "mudda_number": "",
            "faisala_date_from": "",
            "faisala_date_to": "",
            "mudda_type": mudda_types[mudda_type],
            "mudda_name": "",
            "badi": "",
            "pratibadi": "",
            "judge": "",
            "ijlas_type": "",
            "nirnaya_number": "",
            "faisala_type": "",
            "keywords": "",
            "edition": "",
            "year": english_sal,
            "month": "",
            "volume": "",
            "Submit": "खोज्‍नुहोस्"
        }
        return f"{base_url}?{urlencode(params)}#"
    
    def save_html_file(self, url, html_content, mudda_type, sal):
        """Save HTML content to file with standardized naming"""
        filename = self.generate_html_filename(url, mudda_type, sal)
        filepath = os.path.join(self.html_folder, filename)
        
        with open(filepath, "w", encoding="utf-8") as f:
            f.write(html_content)
        
        return filepath

    def load_html_file(self, url, mudda_type, sal):
        """Load HTML content from existing file"""
        filename = self.generate_html_filename(url, mudda_type, sal)
        filepath = os.path.join(self.html_folder, filename)
        
        if os.path.exists(filepath):
            with open(filepath, "r", encoding="utf-8") as f:
                return f.read()
        return None

    def return_soup(self, url, mudda_type=None, sal=None, use_saved=True, max_retries=3):
        """Get soup object from URL or saved HTML file"""
        # Try to load from saved file first if requested
        if use_saved and mudda_type and sal:
            html_content = self.load_html_file(url, mudda_type, sal)
            if html_content:
                print(f"Using saved HTML file for {url}")
                return BeautifulSoup(html_content, 'html.parser')
        
        # Download from web if not found in saved files or use_saved is False
        for attempt in range(max_retries):
            try:
                r = requests.get(url, timeout=30, headers={
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                })
                if r.status_code == 200:
                    r.encoding = 'utf-8'
                    
                    # Save HTML file if mudda_type and sal are provided
                    if mudda_type and sal:
                        filepath = self.save_html_file(url, r.text, mudda_type, sal)
                        print(f"Saved HTML to: {filepath}")
                    
                    return BeautifulSoup(r.text, 'html.parser')
                else:
                    print(f"Attempt {attempt + 1}: Failed to retrieve {url}. Status code: {r.status_code}")
                    if attempt < max_retries - 1:
                        time.sleep(2 ** attempt)
                    
            except requests.exceptions.RequestException as e:
                print(f"Attempt {attempt + 1}: Error scraping {url}: {e}")
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)
                    
        return None

    def get_saved_html_files_by_criteria(self, mudda_type=None, sal=None):
        """Get list of saved HTML files matching criteria"""
        pattern = "*"
        
        if mudda_type and sal:
            mudda_number = self.get_mudda_type_number(mudda_type)
            english_sal = self.nepali_sal_to_english_sal(sal)
            pattern = f"{mudda_number}_{english_sal}_*.html"
        elif sal:
            english_sal = self.nepali_sal_to_english_sal(sal)
            pattern = f"*_{english_sal}_*.html"
        elif mudda_type:
            mudda_number = self.get_mudda_type_number(mudda_type)
            pattern = f"{mudda_number}_*_*.html"
        
        search_path = os.path.join(self.html_folder, pattern)
        return glob.glob(search_path)

    def extract_info_from_filename(self, filename):
        """Extract mudda_type, sal, and link_number from filename"""
        basename = os.path.basename(filename)
        match = re.match(r'(\d+)_(\d+)_(\d+)\.html', basename)
        
        if match:
            mudda_number, sal, link_number = match.groups()
            mudda_type = self.mudda_type_arr[int(mudda_number) - 1]
            return mudda_type, sal, link_number
        
        return None, None, None

    def from_each_page(self, links):
        """Extract unique case links from page links"""
        li = []
        flag = False
        i = 0
        while(i < len(links)):
            href = links[i].get('href')
            if href and "#" in href:
                i+=1
                if i < len(links):
                    temp_href = links[i].get('href')
                    if temp_href:
                        li.append(temp_href)
            else:
                i+=1
        unique_list = []
        if(len(li) > 1):
            unique_list = list(dict.fromkeys(li))
        return unique_list
    
    def get_all_pages(self, initial_url, mudda_type=None, sal=None, use_saved=True):
        """Get all page URLs for pagination"""
        soup = self.return_soup(initial_url, mudda_type, sal, use_saved)
        if not soup:
            return []
            
        links = soup.find_all('a')
        all_links = []
        other_pages = []
        
        for link in links:
            href = link.get('href')
            if href:
                all_links.append(href)
                if "https://nkp.gov.np/advance_search/" in href:
                    other_pages.append(href)
        
        unique_list = self.from_each_page(links)
        
        # Handle pagination
        if "javascript:void(0)" in all_links and other_pages:
            mx = 0
            for j in other_pages:
                temp = ""
                for i in range(len(j)-1, -1, -1):
                    if j[i] == "=":
                        break
                    temp = j[i] + temp
                try:
                    temp2 = int(temp)
                    if mx < temp2:
                        mx = temp2
                except ValueError:
                    continue
            if mx > 0:
                st = other_pages[0][:-2]
                real_other_pages = []
                for i in range(20, mx + 1, 20):
                    real_other_pages.append(st + str(i))
                
                unique_list2 = []
                for page_url in real_other_pages:
                    print(f"Processing page: {page_url}")
                    try:
                        page_soup = self.return_soup(page_url, mudda_type, sal, use_saved)
                        if page_soup:
                            page_links = page_soup.find_all('a')
                            unique_list2 += self.from_each_page(page_links)
                    except Exception as e:
                        print(f"Error scraping page {page_url}: {e}")
                
                unique_list += unique_list2
        
        # Remove duplicates
        unique_unique_list = list(dict.fromkeys(unique_list))
        return unique_unique_list
    
    def get_edition_field(self, soup, label):
        """Extract edition field from soup"""
        edition_info = soup.find("div", id="edition-info")
        if edition_info:
            for span in edition_info.find_all("span"):
                if label in span.text:
                    strong = span.find("strong")
                    return strong.text.strip() if strong else None
        return None

    def determine_scraper_method(self, sal):
        """Determine which scraper method to use based on year"""
        eng_sal = int(self.nepali_sal_to_english_sal(sal))

        today = nepali_datetime.date.today()
        latest_nepali_year = int(today.year)
        
        if 2015 <= eng_sal <= 2044:
            return self.scrape_case_details_2015_to_2044
        elif 2045 <= eng_sal <= 2050:
            return self.scrape_case_details_2045_to_2050
        elif 2051 <= eng_sal <= 2061:
            return self.scrape_case_details_2051_to_2061
        elif 2062 <= eng_sal <= 2072:
            return self.scrape_case_details_2062_to_2072
        elif 2073 <= eng_sal < latest_nepali_year:
            return self.scrape_case_details_2073_to_2080_and_beyond
        else:
            raise ValueError(f"No scraper method available for year {eng_sal} or those records not yet available in Nepal Kanun Patrika Website")

    def scrape_case_details_generic(self, url, mudda_type, sal, use_saved=True):
        """Generic method that routes to the appropriate scraper based on year"""
        try:
            scraper_method = self.determine_scraper_method(sal)
            return scraper_method(url, mudda_type, sal, use_saved)
        except ValueError as e:
            print(f"Error: {e}")
            return False

    # [Previous scraper methods with modifications for HTML file handling]
    def scrape_case_details_2015_to_2044(self, url, mudda_type, sal=None, use_saved=True):
        """Scrape details from a single case URL (2015-2044)"""
        try:
            cursor = self.conn.cursor()
            cursor.execute('SELECT लिङ्क FROM cases WHERE लिङ्क = ?', (url,))
            if cursor.fetchone():
                print(f"URL {url} already exists in database, skipping...")
                return True
            
            # Get soup using saved HTML or web
            soup = self.return_soup(url, mudda_type, sal, use_saved)
            if not soup:
                print(f"Failed to get content for {url}")
                return False
            
            # Extract basic information
            title_tag = soup.find("h1", class_="post-title")
            decision_title = title_tag.get_text(strip=True).split()[2] if title_tag and len(title_tag.get_text(strip=True).split()) > 2 else "N/A"
            
            bhaag = self.get_edition_field(soup, "भाग")
            saal = self.get_edition_field(soup, "साल")
            mahina = self.get_edition_field(soup, "महिना")
            anka = self.get_edition_field(soup, "अंक")
            
            # Extract decision date
            post_meta = soup.find("div", class_="post-meta")
            decision_date = "N/A"
            if post_meta and "फैसला मिति" in post_meta.text:
                try:
                    decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]
                except IndexError:
                    decision_date = "N/A"
            
            # Extract detailed information
            div_tag = soup.find("div", id="faisala_detail ")
            details = {}
            
            if div_tag:
                tags = div_tag.find_all(['h1', 'p'])
                n = len(tags)
                ind = 0
                temp_ind_32 = ind
                KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
                KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
                KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
                KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
                KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
                KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
                KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
                KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
                KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]
                
                # Extract court information
                temp_ijlash = ""
                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if text:
                        if any(kw == text for kw in KEYWORDS_6):
                            if "निर्णय नं." not in temp_ijlash:
                                details["इजलास"] = temp_ijlash
                            ind+=1
                            break
                        elif any(kw in text for kw in KEYWORDS_6):
                            details["इजलास"] = text
                            ind+=1
                            text_2 = tags[ind].get_text(separator=' ', strip=True)
                            if any(kw in text_2 for kw in KEYWORDS_8) == False:
                                details["इजलास"] = text +" "+ text_2
                                ind+=1
                            break
                        elif any(kw in text for kw in KEYWORDS_8):
                            if "निर्णय नं." not in temp_ijlash:
                                details["इजलास"] = temp_ijlash
                            break
                        temp_ijlash = text
                    ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Extract judges
                judges = []
                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if text:
                        if any(kw in text for kw in KEYWORDS_8):
                            judges.append(text)
                        else:
                            details["न्यायाधीश"] = judges
                            if any(kw in text for kw in KEYWORDS_10):
                                details["केस_नम्बर"] = text
                                ind+=1
                            elif any(kw2 in text for kw2 in KEYWORDS_3) == False and any(kw2 in text for kw2 in KEYWORDS_5) == False:
                                details["केस_नम्बर"] = text
                                ind+=1
                            break
                    ind += 1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Standard case structure      
                bisaya_before_niweduck = False
                temp_ind_64 = ind

                while temp_ind_64 < n:
                    text = tags[temp_ind_64].get_text(separator=' ', strip=True)
                    if any(kw in text for kw in KEYWORDS_3) or any(kw in text for kw in KEYWORDS_4):
                        break
                    if any(kw in text for kw in KEYWORDS_5):
                        bisaya_before_niweduck = True
                        break
                    temp_ind_64+=1
                
                if bisaya_before_niweduck:    
                    while ind < n:
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if any(text.startswith(kw) for kw in KEYWORDS_5):
                            details["विषय"] = text
                            ind+=1
                            break
                        if any(kw in text for kw in KEYWORDS_3):
                            ind = temp_ind_32
                            break
                        ind+=1
                else:
                    while ind  < n:
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
                            details["आदेश मिति"] = text
                            ind+=1
                            break
                        if any(kw in text for kw in KEYWORDS_3):
                            ind = temp_ind_32
                            break
                        ind+=1
                
                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                        
                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if any(kw in text for kw in KEYWORDS_3):
                        if any(kw2 == text for kw2 in KEYWORDS_3):
                            ind += 1
                            text = tags[ind].get_text(separator=' ', strip=True)
                        details["निवेदक"] = text
                        ind+=1
                        break
                    ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind

                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if any(kw in text for kw in KEYWORDS_4):
                        if any(kw2 == text for kw2 in KEYWORDS_4):
                            ind += 1
                            text = tags[ind].get_text(separator=' ', strip=True)
                        details["विपक्षी"] = text
                        ind+=1
                        break
                    ind+=1
                    
                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind

                if bisaya_before_niweduck==False:    
                    while ind < n:
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if any(text.startswith(kw) for kw in KEYWORDS_5):
                            details["विषय"] = text
                            ind+=1
                            break
                        if any(kw in text for kw in KEYWORDS_2):
                            ind = temp_ind_32
                            break
                        ind+=1
                else:
                    while ind  < n:
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
                            details["आदेश मिति"] = text
                            ind+=1
                            break
                        if any(kw in text for kw in KEYWORDS_2):
                            ind = temp_ind_32
                            break
                        ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Extract prakarans and tahar
                prakarans = []
                prev = ""
                tahar = []
                temp_flag_tahar = False

                for tag in tags[ind:]:
                    text = tag.get_text(separator=' ', strip=True)
                    if text:
                        #if "§" in text or any(kw in text for kw in KEYWORDS_2):
                            #prakarans.append(text)
                        if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                            if any(text.startswith(kw) for kw in KEYWORDS_2):
                                if prev:  # IMPROVEMENT 23: Only append if prev has content
                                    prakarans.append(prev)
                                prakarans.append(text)
                                prev = ""
                            if "§" in text:
                                prakarans.append(text)
                            if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                                if not prakarans:
                                    prakarans.append(prev)
                        else:
                            prev = prev + " " + text if prev else text
                        
                        if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                            temp_flag_tahar = True
                            tahar.append(text)

                    # Process list items
                    next_sib = tag.find_next_sibling()
                    while next_sib and next_sib.name in ['ul', 'ol']:
                        for li in next_sib.find_all('li'):
                            li_text = li.get_text(separator=' ', strip=True)
                            if li_text:
                                if any(li_text.startswith(kw) for kw in KEYWORDS_2):
                                    if prev:  # IMPROVEMENT 24: Only append if prev has content
                                        prakarans.append(prev)
                                    prakarans.append(li_text)
                                    prev = ""
                                else:
                                    prev = prev + " " + li_text if prev else li_text
                                if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                                    temp_flag_tahar = True
                                    tahar.append(li_text)
                        next_sib = next_sib.find_next_sibling()
                        
                details["प्रकरण"] = prakarans
                details["ठहर"] = tahar
            
            # Get HTML file path
            html_file_path = ""
            if mudda_type and sal:
                filename = self.generate_html_filename(url, mudda_type, sal)
                html_file_path = os.path.join(self.html_folder, filename)
            
            # Combine all data
            data = {
                "लिङ्क": url,
                "निर्णय नं.": decision_title,
                "भाग": bhaag or "N/A",
                "मुद्दाको किसिम": mudda_type,
                "साल": saal or "N/A",
                "महिना": mahina or "N/A",
                "अंक": anka or "N/A",
                "फैसला मिति": f"'{decision_date}'",
                "अदालत / इजलास": details.get("इजलास", "N/A"),
                "न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
                "आदेश मिति": details.get("आदेश मिति", "N/A"),
                "केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
                "विषय": details.get("विषय", "N/A"),
                "निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
                "विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
                "प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
                "ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
                "html_file_path": html_file_path
            }
            
            # Save to SQLite
            self.save_to_sqlite(data)
            print(f"{url} - Successfully Scraped and Entered")
            return True
            
        except Exception as e:
            print(f"Error scraping {url}: {e}")
            return False

    def scrape_case_details_2045_to_2050(self, url, mudda_type, sal = None, use_saved=True):  # CHANGE 4: Remove output_db parameter
        """Scrape details from a single case URL"""
        try:
            # IMPROVEMENT 16: Check if URL already exists in database
            cursor = self.conn.cursor()
            cursor.execute('SELECT लिङ्क FROM cases WHERE लिङ्क = ?', (url,))
            if cursor.fetchone():
                print(f"URL {url} already exists in database, skipping...")
                return True

            # Get soup using saved HTML or web
            soup = self.return_soup(url, mudda_type, sal, use_saved)
            if not soup:
                print(f"Failed to get content for {url}")
                return False
            
            # Extract basic information
            title_tag = soup.find("h1", class_="post-title")
            decision_title = title_tag.get_text(strip=True).split()[2] if title_tag and len(title_tag.get_text(strip=True).split()) > 2 else "N/A"  # IMPROVEMENT 19: Bounds checking
            
            bhaag = self.get_edition_field(soup, "भाग")
            saal = self.get_edition_field(soup, "साल")
            mahina = self.get_edition_field(soup, "महिना")
            anka = self.get_edition_field(soup, "अंक")
            
            # Extract decision date
            post_meta = soup.find("div", class_="post-meta")
            decision_date = "N/A"
            if post_meta and "फैसला मिति" in post_meta.text:
                try:  # IMPROVEMENT 20: Better error handling for date extraction
                    decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]
                except IndexError:
                    decision_date = "N/A"
            
            # Extract detailed information
            div_tag = soup.find("div", id="faisala_detail ")
            details = {}
            
            if div_tag:
                tags = div_tag.find_all(['h1', 'p'])
                n = len(tags)
                ind = 0
                temp_ind_32 = ind
                KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
                KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
                KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
                KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
                KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
                KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
                KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
                KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
                KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]
                
                # Extract court information
                temp_ijlash = ""
                while(ind < n):
                    #text = p_tags[ind].get_text(strip=True)
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if text:
                        if any(kw == text for kw in KEYWORDS_6):
                            details["इजलास"] = temp_ijlash
                            ind+=1
                            break
                        elif any(kw in text for kw in KEYWORDS_6):
                            details["इजलास"] = text
                            ind+=1
                            break
                        elif "न्यायाधीश" in text or "माननीय" in text:
                            details["इजलास"] = temp_ijlash
                            break
                        temp_ijlash = text
                    ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Extract judges
                judges = []
                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if text:
                        if "न्यायाधीश" in text or "माननीय" in text:
                            judges.append(text)
                        elif any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
                            details["न्यायाधीश"] = judges
                            details["आदेश मिति"] = text
                            ind += 1
                            break
                        else:
                            details["केस_नम्बर"] = text
                    ind += 1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Standard case structure    
                bisaya_before_niweduck = False
                details["विषय"] = ""

                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if any(kw in text for kw in KEYWORDS_3) or any(kw in text for kw in KEYWORDS_4):
                        break
                    if any(kw in text for kw in KEYWORDS_5):
                        bisaya_before_niweduck = True
                        break
                    ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind

                if bisaya_before_niweduck:    
                    while ind < n:
                        #text = p_tags[ind].get_text(strip=True)
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if any(kw in text for kw in KEYWORDS_5):
                            details["विषय"] = text
                            ind+=1
                            break
                        ind+=1
                
                    if ind >= n:
                        ind = temp_ind_32
                    else:
                        temp_ind_32 = ind
                        
                    #temp_Ind = ind
                while ind < n:
                        #text = p_tags[ind].get_text(strip=True)
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if any(kw in text for kw in KEYWORDS_3):
                        if any(kw2 == text for kw2 in KEYWORDS_3):
                            ind += 1
                            text = tags[ind].get_text(separator=' ', strip=True)
                        details["निवेदक"] = text
                        ind+=1
                        break
                    ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind

                while ind < n:
                        #text = p_tags[ind].get_text(strip=True)
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if any(kw in text for kw in KEYWORDS_4):
                        if any(kw2 == text for kw2 in KEYWORDS_4):
                            ind += 1
                            text = tags[ind].get_text(separator=' ', strip=True)
                        details["विपक्षी"] = text
                        ind+=1
                        break
                    ind+=1
                    
                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind

                if bisaya_before_niweduck==False:    
                    while ind < n:
                        #text = p_tags[ind].get_text(strip=True)
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if any(kw in text for kw in KEYWORDS_5):
                            details["विषय"] = text
                            ind+=1
                            break
                        ind+=1
                
                    if ind >= n:
                        ind = temp_ind_32
                    else:
                        temp_ind_32 = ind
                
                # Extract prakarans and tahar
                prakarans = []
                prev = ""


                tahar = []
                temp_flag_tahar = False

                for tag in tags[ind:]:
                    text = tag.get_text(separator=' ', strip=True)
                    if text:
                        #if "§" in text or any(kw in text for kw in KEYWORDS_2):
                            #prakarans.append(text)
                        if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                            if any(text.startswith(kw) for kw in KEYWORDS_2):
                                if prev:  # IMPROVEMENT 23: Only append if prev has content
                                    prakarans.append(prev)
                                prakarans.append(text)
                                prev = ""
                            if "§" in text:
                                prakarans.append(text)
                            if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                                if not prakarans:
                                    prakarans.append(prev)
                        else:
                            prev = prev + " " + text if prev else text
                        
                        if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                            temp_flag_tahar = True
                            tahar.append(text)

                    # Process list items
                    next_sib = tag.find_next_sibling()
                    while next_sib and next_sib.name in ['ul', 'ol']:
                        for li in next_sib.find_all('li'):
                            li_text = li.get_text(separator=' ', strip=True)
                            if li_text:
                                if any(li_text.startswith(kw) for kw in KEYWORDS_2):
                                    if prev:  # IMPROVEMENT 24: Only append if prev has content
                                        prakarans.append(prev)
                                    prakarans.append(li_text)
                                    prev = ""
                                else:
                                    prev = prev + " " + li_text if prev else li_text
                                if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                                    temp_flag_tahar = True
                                    tahar.append(li_text)
                        next_sib = next_sib.find_next_sibling()
                
                details["प्रकरण"] = prakarans
                details["ठहर"] = tahar

            # Get HTML file path
            html_file_path = ""
            if mudda_type and sal:
                filename = self.generate_html_filename(url, mudda_type, sal)
                html_file_path = os.path.join(self.html_folder, filename)
            
            # Combine all data, handling lists and strings appropriately
            data = {
                "लिङ्क": url,
                "निर्णय नं.": decision_title,
                "भाग": bhaag or "N/A",  # IMPROVEMENT 25: Handle None values
                "मुद्दाको किसिम": mudda_type,
                "साल": saal or "N/A",
                "महिना": mahina or "N/A",
                "अंक": anka or "N/A",
                "फैसला मिति": f"'{decision_date}'",
                "अदालत / इजलास": details.get("इजलास", "N/A"),
                "न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
                "आदेश मिति": details.get("आदेश मिति", "N/A"),
                "केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
                "विषय": details.get("विषय", "N/A"),
                "निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
                "विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
                "प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
                "ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
                "html_file_path": html_file_path
            }
            
            # Save to SQLite
            self.save_to_sqlite(data)
            print(f"{url} - Successfully Scraped and Entered")
            return True
            
        except Exception as e:
            print(f"Error scraping {url}: {e}")
            return False

    def scrape_case_details_2051_to_2061(self, url, mudda_type, sal = None, use_saved=True):  # CHANGE 4: Remove output_db parameter
        """Scrape details from a single case URL"""
        try:
            # IMPROVEMENT 16: Check if URL already exists in database
            cursor = self.conn.cursor()
            cursor.execute('SELECT लिङ्क FROM cases WHERE लिङ्क = ?', (url,))
            if cursor.fetchone():
                print(f"URL {url} already exists in database, skipping...")
                return True

            # Get soup using saved HTML or web
            soup = self.return_soup(url, mudda_type, sal, use_saved)
            if not soup:
                print(f"Failed to get content for {url}")
                return False
            
            # Extract basic information
            title_tag = soup.find("h1", class_="post-title")
            decision_title = title_tag.get_text(strip=True).split()[2] if title_tag and len(title_tag.get_text(strip=True).split()) > 2 else "N/A"  # IMPROVEMENT 19: Bounds checking
            
            bhaag = self.get_edition_field(soup, "भाग")
            saal = self.get_edition_field(soup, "साल")
            mahina = self.get_edition_field(soup, "महिना")
            anka = self.get_edition_field(soup, "अंक")
            
            # Extract decision date
            post_meta = soup.find("div", class_="post-meta")
            decision_date = "N/A"
            if post_meta and "फैसला मिति" in post_meta.text:
                try:  # IMPROVEMENT 20: Better error handling for date extraction
                    decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]
                except IndexError:
                    decision_date = "N/A"
            
            # Extract detailed information
            div_tag = soup.find("div", id="faisala_detail ")
            details = {}
            
            if div_tag:
                tags = div_tag.find_all(['h1', 'p'])
                n = len(tags)
                ind = 0
                temp_ind_32 = ind
                KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
                KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
                KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
                KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
                KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
                KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
                KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
                KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
                KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]
                
                # Extract court information
                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if text and ("इजलास" in text or "इजालास" in text):
                        details["इजलास"] = text
                        ind += 1
                        break
                    ind += 1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Extract judges
                judges = []
                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if text:
                        if "न्यायाधीश" in text or "माननीय" in text:
                            judges.append(text)
                        elif any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
                            details["न्यायाधीश"] = judges
                            details["आदेश मिति"] = text
                            ind += 1
                            break
                        else:
                            details["केस_नम्बर"] = text
                    ind += 1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Standard case structure
                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if "विषय" in text or "मुद्दा" in text or "बिषय" in text or "मूद्दाः" in text:
                        details["विषय"] = text
                        ind += 1
                        break
                    ind += 1

                if ind >= n:
                    ind = temp_ind_32
                    
                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if any(kw in text for kw in KEYWORDS_3):
                        if any(kw2 == text for kw2 in KEYWORDS_3):
                            ind += 1
                            if ind < n:  # IMPROVEMENT 21: Bounds checking
                                text = tags[ind].get_text(separator=' ', strip=True)
                        details["निवेदक"] = text
                        ind += 1
                        break
                    ind += 1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                    
                while ind < n:
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if any(kw in text for kw in KEYWORDS_4):
                        if any(kw2 == text for kw2 in KEYWORDS_4):
                            ind += 1
                            if ind < n:  # IMPROVEMENT 22: Bounds checking
                                text = tags[ind].get_text(separator=' ', strip=True)
                        details["विपक्षी"] = text
                        ind += 1
                        break
                    ind += 1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Extract prakarans and tahar
                prakarans = []
                prev = ""


                tahar = []
                temp_flag_tahar = False

                for tag in tags[ind:]:
                    text = tag.get_text(separator=' ', strip=True)
                    if text:
                        #if "§" in text or any(kw in text for kw in KEYWORDS_2):
                            #prakarans.append(text)
                        if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                            if any(text.startswith(kw) for kw in KEYWORDS_2):
                                if prev:  # IMPROVEMENT 23: Only append if prev has content
                                    prakarans.append(prev)
                                prakarans.append(text)
                                prev = ""
                            if "§" in text:
                                prakarans.append(text)
                            if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                                if not prakarans:
                                    prakarans.append(prev)
                        else:
                            prev = prev + " " + text if prev else text
                        
                        if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                            temp_flag_tahar = True
                            tahar.append(text)

                    # Process list items
                    next_sib = tag.find_next_sibling()
                    while next_sib and next_sib.name in ['ul', 'ol']:
                        for li in next_sib.find_all('li'):
                            li_text = li.get_text(separator=' ', strip=True)
                            if li_text:
                                if any(li_text.startswith(kw) for kw in KEYWORDS_2):
                                    if prev:  # IMPROVEMENT 24: Only append if prev has content
                                        prakarans.append(prev)
                                    prakarans.append(li_text)
                                    prev = ""
                                else:
                                    prev = prev + " " + li_text if prev else li_text
                                if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                                    temp_flag_tahar = True
                                    tahar.append(li_text)
                        next_sib = next_sib.find_next_sibling()
                
                details["प्रकरण"] = prakarans
                details["ठहर"] = tahar

            # Get HTML file path
            html_file_path = ""
            if mudda_type and sal:
                filename = self.generate_html_filename(url, mudda_type, sal)
                html_file_path = os.path.join(self.html_folder, filename)
            
            # Combine all data, handling lists and strings appropriately
            data = {
                "लिङ्क": url,
                "निर्णय नं.": decision_title,
                "भाग": bhaag or "N/A",  # IMPROVEMENT 25: Handle None values
                "मुद्दाको किसिम": mudda_type,
                "साल": saal or "N/A",
                "महिना": mahina or "N/A",
                "अंक": anka or "N/A",
                "फैसला मिति": f"'{decision_date}'",
                "अदालत / इजलास": details.get("इजलास", "N/A"),
                "न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
                "आदेश मिति": details.get("आदेश मिति", "N/A"),
                "केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
                "विषय": details.get("विषय", "N/A"),
                "निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
                "विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
                "प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
                "ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
                "html_file_path": html_file_path
            }
            
            # Save to SQLite
            self.save_to_sqlite(data)
            print(f"{url} - Successfully Scraped and Entered")
            return True
            
        except Exception as e:
            print(f"Error scraping {url}: {e}")
            return False
    
    def scrape_case_details_2062_to_2072(self, url, mudda_type, sal = None, use_saved=True):  # CHANGE 4: Remove output_db parameter
        """Scrape details from a single case URL"""
        try:
            # IMPROVEMENT 16: Check if URL already exists in database
            cursor = self.conn.cursor()
            cursor.execute('SELECT लिङ्क FROM cases WHERE लिङ्क = ?', (url,))
            if cursor.fetchone():
                print(f"URL {url} already exists in database, skipping...")
                return True
            # Get soup using saved HTML or web
            soup = self.return_soup(url, mudda_type, sal, use_saved)
            if not soup:
                print(f"Failed to get content for {url}")
                return False
            
            # Extract basic information
            title_tag = soup.find("h1", class_="post-title")
            decision_title = title_tag.get_text(strip=True).split()[2] if title_tag and len(title_tag.get_text(strip=True).split()) > 2 else "N/A"  # IMPROVEMENT 19: Bounds checking
            
            bhaag = self.get_edition_field(soup, "भाग")
            saal = self.get_edition_field(soup, "साल")
            mahina = self.get_edition_field(soup, "महिना")
            anka = self.get_edition_field(soup, "अंक")
            
            # Extract decision date
            post_meta = soup.find("div", class_="post-meta")
            decision_date = "N/A"
            if post_meta and "फैसला मिति" in post_meta.text:
                try:  # IMPROVEMENT 20: Better error handling for date extraction
                    decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]
                except IndexError:
                    decision_date = "N/A"
            
            # Extract detailed information
            div_tag = soup.find("div", id="faisala_detail ")
            details = {}
            
            if div_tag:
                tags = div_tag.find_all(['h1', 'p'])
                n = len(tags)
                ind = 0
                temp_ind_32 = ind
                KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
                KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
                KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
                KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
                KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
                KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
                KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
                KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
                KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]
                
                # Extract court information
                temp_ijlash = ""
                while(ind < n):
                    #text = p_tags[ind].get_text(strip=True)
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if text:
                        if any(kw == text for kw in KEYWORDS_6):
                            if "निर्णय नं." not in temp_ijlash:
                                details["अदालत / इजलास"] = temp_ijlash
                            ind+=1
                            break
                        elif any(kw in text for kw in KEYWORDS_6):
                            details["अदालत / इजलास"] = text
                            ind+=1
                            text_2 = tags[ind].get_text(separator=' ', strip=True)
                            if any(kw in text_2 for kw in KEYWORDS_8) == False:
                                details["अदालत / इजलास"] = text +" "+ text_2
                                ind+=1
                            break
                        elif any(kw in text for kw in KEYWORDS_8):
                            if "निर्णय नं." not in temp_ijlash:
                                details["अदालत / इजलास"] = temp_ijlash
                            break
                        temp_ijlash = text
                    ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Extract judges
                judges = []
                faisla_miti_before_case_no = False
                subject_before_case_no = False
                while(ind < n):
                    #text = p_tags[ind].get_text(strip=True)
                    text = tags[ind].get_text(separator=' ', strip=True)
                    if text:
                        if any(kw in text for kw in KEYWORDS_8):
                            judges.append(text)
                        else:
                            details["न्यायाधीश"] = judges
                            if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
                                details["आदेश मिति"] = text
                                ind+=1
                                faisla_miti_before_case_no = True
                            elif any(kw in text for kw in KEYWORDS_10):
                                details["केस_नम्बर"] = text
                            elif any(kw2 in text for kw2 in KEYWORDS_3) == False and any(kw2 in text for kw2 in KEYWORDS_5) == False:
                                if text!="फैसला":
                                    details["केस_नम्बर"] = text
                                else:
                                    ind+=1
                                    details["केस_नम्बर"] = tags[ind].get_text(separator=' ', strip=True)
                                ind+=1
                            break
                    ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind

                # Standard case structure

                if faisla_miti_before_case_no:
                    while ind < n:
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if text:
                            if any(kw in text for kw in KEYWORDS_10):
                                details["केस_नम्बर"] = text
                            elif any(text.startswith(kw) for kw in KEYWORDS_5):
                                subject_before_case_no = True
                                details["विषय"] = text
                            else:
                                details["केस_नम्बर"] = text
                            ind+=1
                            break
                        ind+=1
                else:
                    while ind < n:
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if text:
                            if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
                                details["आदेश मिति"] = text
                                ind+=1
                                break
                            if any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                                ind = temp_ind_32
                                break
                        ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind

                if subject_before_case_no:
                    while ind < n:
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if text:
                            details["केस_नम्बर"] = text
                            ind+=1
                            break
                        ind+=1
                else: 
                    while ind < n:
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if text:
                            if any(text.startswith(kw) for kw in KEYWORDS_5):
                                details["विषय"] = text
                                ind+=1
                                break
                            if any(kw in text for kw in KEYWORDS_3):
                                ind = temp_ind_32
                                break
                        ind+=1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                temp_ind_64 = ind
                count_how_many = 0
                
                while temp_ind_64 < n:
                    text = tags[temp_ind_64].get_text(separator=' ', strip=True)
                    if text and any(kw == text for kw in KEYWORDS_9):
                        count_how_many += 1
                    if any(text.startswith(kw) for kw in KEYWORDS_2):
                        break
                    temp_ind_64+=1


                if count_how_many > 1:
                    case_no = []
                    appellant = []
                    opposition = []
                    while count_how_many > 0:
                        while ind < n:
                        #text = p_tags[ind].get_text(strip=True)
                            text = tags[ind].get_text(separator=' ', strip=True)
                            if any(kw in text for kw in KEYWORDS_3):
                                if any(kw2 == text for kw2 in KEYWORDS_3):
                                    ind += 1
                                    text = tags[ind].get_text(separator=' ', strip=True)
                                appellant.append(text)
                                ind+=1
                                break
                            ind+=1
                    
                        if ind >= n:
                            ind = temp_ind_32
                        else:
                            temp_ind_32 = ind
                    
                        while ind < n:
                                #text = p_tags[ind].get_text(strip=True)
                            text = tags[ind].get_text(separator=' ', strip=True)
                            if any(kw in text for kw in KEYWORDS_4):
                                if any(kw2 == text for kw2 in KEYWORDS_4):
                                    ind += 1
                                    text = tags[ind].get_text(separator=' ', strip=True)
                                opposition.append(text)
                                ind+=1
                                break
                            ind+=1
                            
                        if ind >= n:
                            ind = temp_ind_32
                        else:
                            temp_ind_32 = ind

                        count_how_many-=1
                        
                    temp_ind_128 = 0
                    
                    while temp_ind_128 < n:
                        text = tags[temp_ind_128].get_text(separator=' ', strip=True)
                        if text:
                            if any(kw in text for kw in KEYWORDS_10):
                                case_no.append(text)
                            if any(text.startswith(kw) for kw in KEYWORDS_2) or any(kw == text for kw in KEYWORDS_7):
                                break
                        temp_ind_128 += 1

                    details["केस_नम्बर"] = case_no
                    details["निवेदक"] = appellant
                    details["विपक्षी"] = opposition

                else:
                    while ind < n:
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if any(kw in text for kw in KEYWORDS_3):
                            if any(kw2 == text for kw2 in KEYWORDS_3):
                                ind += 1
                                text = tags[ind].get_text(separator=' ', strip=True)
                            details["निवेदक"] = text
                            ind+=1
                            break
                        ind+=1 

                    if ind >= n:
                        ind = temp_ind_32
                    else:
                        temp_ind_32 = ind

                    while ind < n:
                        #text = p_tags[ind].get_text(strip=True)
                        text = tags[ind].get_text(separator=' ', strip=True)
                        if any(kw in text for kw in KEYWORDS_4):
                            if any(kw2 == text for kw2 in KEYWORDS_4):
                                ind += 1
                                text = tags[ind].get_text(separator=' ', strip=True)
                            details["विपक्षी"] = text
                            ind+=1
                            break
                        ind+=1

                    if ind >= n:
                        ind = temp_ind_32
                    else:
                        temp_ind_32 = ind
                
                
                # Extract prakarans and tahar
                prakarans = []
                prev = ""


                tahar = []
                temp_flag_tahar = False

                for tag in tags[ind:]:
                    text = tag.get_text(separator=' ', strip=True)
                    if text:
                        #if "§" in text or any(kw in text for kw in KEYWORDS_2):
                            #prakarans.append(text)
                        if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                            if any(text.startswith(kw) for kw in KEYWORDS_2):
                                if prev:  # IMPROVEMENT 23: Only append if prev has content
                                    prakarans.append(prev)
                                prakarans.append(text)
                                prev = ""
                            if "§" in text:
                                prakarans.append(text)
                            if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                                if not prakarans:
                                    prakarans.append(prev)
                        else:
                            prev = prev + " " + text if prev else text
                        
                        if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                            temp_flag_tahar = True
                            tahar.append(text)

                    # Process list items
                    next_sib = tag.find_next_sibling()
                    while next_sib and next_sib.name in ['ul', 'ol']:
                        for li in next_sib.find_all('li'):
                            li_text = li.get_text(separator=' ', strip=True)
                            if li_text:
                                if any(li_text.startswith(kw) for kw in KEYWORDS_2):
                                    if prev:  # IMPROVEMENT 24: Only append if prev has content
                                        prakarans.append(prev)
                                    prakarans.append(li_text)
                                    prev = ""
                                else:
                                    prev = prev + " " + li_text if prev else li_text
                                if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                                    temp_flag_tahar = True
                                    tahar.append(li_text)
                        next_sib = next_sib.find_next_sibling()
                
                details["प्रकरण"] = prakarans
                details["ठहर"] = tahar

            # Get HTML file path
            html_file_path = ""
            if mudda_type and sal:
                filename = self.generate_html_filename(url, mudda_type, sal)
                html_file_path = os.path.join(self.html_folder, filename)
            
            # Combine all data, handling lists and strings appropriately
            data = {
                "लिङ्क": url,
                "निर्णय नं.": decision_title,
                "भाग": bhaag or "N/A",  # IMPROVEMENT 25: Handle None values
                "मुद्दाको किसिम": mudda_type,
                "साल": saal or "N/A",
                "महिना": mahina or "N/A",
                "अंक": anka or "N/A",
                "फैसला मिति": f"'{decision_date}'",
                "अदालत / इजलास": details.get("अदालत / इजलास", "N/A"),
                "न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
                "आदेश मिति": details.get("आदेश मिति", "N/A"),
                "केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
                "विषय": details.get("विषय", "N/A"),
                "निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
                "विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
                "प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
                "ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
                "html_file_path": html_file_path
            }
            
            # Save to SQLite
            self.save_to_sqlite(data)
            print(f"{url} - Successfully Scraped and Entered")
            return True
            
        except Exception as e:
            print(f"Error scraping {url}: {e}")
            return False

    def scrape_case_details_2073_to_2080_and_beyond(self, url, mudda_type, sal = None, use_saved=True):
        """Scrape details from a single case URL"""
        try:
            r = requests.get(url, timeout=15)
            if r.status_code != 200:
                print(f"Failed to retrieve {url}, Status code: {r.status_code}")
                return False

            # Get soup using saved HTML or web
            soup = self.return_soup(url, mudda_type, sal, use_saved)
            if not soup:
                print(f"Failed to get content for {url}")
                return False
            
            # Extract basic information
            title_tag = soup.find("h1", class_="post-title")
            decision_title = title_tag.get_text(strip=True).split()[2] if title_tag else "N/A"
            
            bhaag = self.get_edition_field(soup, "भाग")
            saal = self.get_edition_field(soup, "साल")
            mahina = self.get_edition_field(soup, "महिना")
            anka = self.get_edition_field(soup, "अंक")
            
            # Extract decision date
            post_meta = soup.find("div", class_="post-meta")
            decision_date = "N/A"
            if post_meta and "फैसला मिति" in post_meta.text:
                decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]
            
            # Extract detailed information
            div_tag = soup.find("div", id="faisala_detail ")
            details = {}
            
            if div_tag:
                tags = div_tag.find_all(['h1', 'p'])
                n = len(tags)
                ind = 0
                temp_ind_32 = ind
                #KEYWORDS_2 = ["(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र.नं."]

                KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
                KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
                KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
                KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
                KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
                KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
                KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
                KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
                KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]
                # Extract court information
                while ind < n:
                    text = tags[ind].get_text(strip=True)
                    if text and any(kw in text for kw in KEYWORDS_6):
                        details["अदालत / इजलास"] = text
                        ind += 1
                        break
                    ind += 1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Extract judges
                judges = []
                while ind < n:
                    text = tags[ind].get_text(strip=True)
                    if text:
                        if any(kw in text for kw in KEYWORDS_8):
                            judges.append(text)
                        if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
                            details["न्यायाधीश"] = judges
                            details["आदेश मिति"] = text
                            ind += 1
                            break
                    ind += 1

                if ind >= n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Extract case details
                bisaya_before_kas_no = False
                while ind < n:
                    text = tags[ind].get_text(strip=True)
                    if text:
                        if any(kw in text for kw in KEYWORDS_5):
                            bisaya_before_kas_no = True
                            details["विषय"] = text
                            ind += 1
                            break
                        details["केस_नम्बर"] = text
                        break
                    ind += 1

                if ind > n:
                    ind = temp_ind_32
                else:
                    temp_ind_32 = ind
                
                # Handle different case structures
                if bisaya_before_kas_no:
                    case_no = []
                    appellant = []
                    opposition = []
                    temp_flag = True
                    
                    while temp_flag and ind < n:
                        # Extract case number
                        while ind < n:
                            text = tags[ind].get_text(strip=True)
                            if text:
                                case_no.append(text)
                                ind += 1
                                break
                            ind += 1
                        
                        # Extract appellant
                        while ind < n:
                            text = tags[ind].get_text(strip=True)
                            if any(kw in text for kw in KEYWORDS_3):
                                if any(kw2 == text for kw2 in KEYWORDS_3):
                                    ind+=1
                                    text = tags[ind].get_text(strip=True)
                                appellant.append(text)
                                ind += 1
                                break
                            ind += 1
                        
                        # Extract opposition
                        while ind < n:
                            text = tags[ind].get_text(strip=True)
                            if any(kw in text for kw in KEYWORDS_4):
                                if any(kw2 == text for kw2 in KEYWORDS_4):
                                    ind += 1
                                    text = tags[ind].get_text(strip=True)
                                opposition.append(text)
                                ind += 1
                                break
                            ind += 1
                        
                        # Check for end condition
                        temp_ind = ind
                        for tag in tags[temp_ind:]:
                            text = tag.get_text(strip=True)
                            if any(kw in text for kw in KEYWORDS_2):
                                temp_flag = False
                                details["केस_नम्बर"] = case_no
                                details["निवेदक"] = appellant
                                details["विपक्षी"] = opposition
                                break
                            elif any(kw == text for kw in KEYWORDS_9):
                                break

                    if ind >= n:
                        ind = temp_ind_32
                    else:
                        temp_ind_32 = ind
                else:
                    # Standard case structure
                    while ind < n:
                        text = tags[ind].get_text(strip=True)
                        if any(kw in text for kw in KEYWORDS_5):
                            details["विषय"] = text
                            ind += 1
                            break
                        ind += 1

                    if ind >= n:
                        ind = temp_ind_32
                    else:
                        temp_ind_32 = ind
                    
                    while ind < n:
                        text = tags[ind].get_text(strip=True)
                        if any(kw in text for kw in KEYWORDS_3):
                            if any(kw2 == text for kw2 in KEYWORDS_3):
                                ind+=1
                                text = tags[ind].get_text(strip=True)
                            details["निवेदक"] = text
                            ind += 1
                            break
                        ind += 1

                    if ind >= n:
                        ind = temp_ind_32
                    else:
                        temp_ind_32 = ind
                    
                    while ind < n:
                        text = tags[ind].get_text(strip=True)
                        if any(kw in text for kw in KEYWORDS_4):
                            if any(kw2 == text for kw2 in KEYWORDS_4):
                                ind+=1
                                text = tags[ind].get_text(strip=True)
                            details["विपक्षी"] = text
                            ind += 1
                            break
                        ind += 1

                    if ind >= n:
                        ind = temp_ind_32
                    else:
                        temp_ind_32 = ind
                
                # Clean up extracted text
                # self.clean_extracted_details(details, bisaya_before_kas_no)
                
                # Extract prakarans and tahar
                prakarans = []
                prev = ""
                tahar = []
                temp_flag_tahar = False

                for tag in tags[ind:]:
                    text = tag.get_text(separator=' ', strip=True)
                    if text:
                        #if "§" in text or any(kw in text for kw in KEYWORDS_2):
                            #prakarans.append(text)
                        if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                            if any(text.startswith(kw) for kw in KEYWORDS_2):
                                if prev:  # IMPROVEMENT 23: Only append if prev has content
                                    prakarans.append(prev)
                                prakarans.append(text)
                                prev = ""
                            if "§" in text:
                                prakarans.append(text)
                            if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
                                if not prakarans:
                                    prakarans.append(prev)
                        else:
                            prev = prev + " " + text if prev else text
                        
                        if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                            temp_flag_tahar = True
                            tahar.append(text)

                    # Process list items
                    next_sib = tag.find_next_sibling()
                    while next_sib and next_sib.name in ['ul', 'ol']:
                        for li in next_sib.find_all('li'):
                            li_text = li.get_text(separator=' ', strip=True)
                            if li_text:
                                if any(li_text.startswith(kw) for kw in KEYWORDS_2):
                                    if prev:  # IMPROVEMENT 24: Only append if prev has content
                                        prakarans.append(prev)
                                    prakarans.append(li_text)
                                    prev = ""
                                else:
                                    prev = prev + " " + li_text if prev else li_text
                                if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
                                    temp_flag_tahar = True
                                    tahar.append(li_text)
                        next_sib = next_sib.find_next_sibling()
                
                details["प्रकरण"] = prakarans
                details["ठहर"] = tahar

            # Get HTML file path
            html_file_path = ""
            if mudda_type and sal:
                filename = self.generate_html_filename(url, mudda_type, sal)
                html_file_path = os.path.join(self.html_folder, filename)
            
            # Combine all data, handling lists and strings appropriately
            data = {
                "लिङ्क": url,
                "निर्णय नं.": decision_title,
                "भाग": bhaag,
                "मुद्दाको किसिम": mudda_type,
                "साल": saal,
                "महिना": mahina,
                "अंक": anka,
                "फैसला मिति": f"'{decision_date}'",
                "अदालत / इजलास": details.get("अदालत / इजलास", "N/A"),
                "न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
                "आदेश मिति": details.get("आदेश मिति", "N/A"),
                "केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
                "विषय": details.get("विषय", "N/A"),
                "निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
                "विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
                "प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
                "ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
                "html_file_path": html_file_path
            }
            
            # Save to SQLite
            self.save_to_sqlite(data)
            print(f"{url} - Successfully Scraped and Entered")
            return True
            
        except Exception as e:
            print(f"Error scraping {url}: {e}")
            return False

    def save_to_sqlite(self, data):
        """Save data to SQLite database"""
        cursor = self.conn.cursor()
        
        try:
            cursor.execute('''
                INSERT OR REPLACE INTO cases (
                    लिङ्क, निर्णय_नं, भाग, मुद्दाको_किसिम, साल, महिना, अंक, फैसला_मिति,
                    अदालत_वा_इजलास, न्यायाधीश, आदेश_मिति, केस_नम्बर, विषय, निवेदक, विपक्षी, 
                    प्रकरण, ठहर, html_file_path
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                data["लिङ्क"], data["निर्णय नं."], data["भाग"], data["मुद्दाको किसिम"],
                data["साल"], data["महिना"], data["अंक"], data["फैसला मिति"],
                data["अदालत / इजलास"], data["न्यायाधीश"], data["आदेश मिति"], data["केस_नम्बर"],
                data["विषय"], data["निवेदक"], data["विपक्षी"], data["प्रकरण"], data["ठहर"],
                data["html_file_path"]
            ))
            self.conn.commit()
        except sqlite3.Error as e:
            print(f"Database error: {e}")
            raise
    
    def save_failed_links(self, failed_links, mudda_type, sal, error_msg="Unknown error"):
        """Save failed links to SQLite database"""
        if failed_links:
            cursor = self.conn.cursor()
            for link in failed_links:
                try:
                    cursor.execute('''
                        INSERT INTO failed_links (मुद्दाको_किसिम, साल, लिङ्क, error_message, retry_count)
                        VALUES (?, ?, ?, ?, ?)
                    ''', (mudda_type, sal, link, error_msg, 1))
                except sqlite3.Error as e:
                    print(f"Error saving failed link {link}: {e}")
            try:
                self.conn.commit()
            except sqlite3.Error as e:
                print(f"Error committing failed links: {e}")

    def test_single_link(self, url, mudda_type=None, sal=None, use_saved=True):
        """Test scraping a single link"""
        print(f"Testing single link: {url}")
        
        # If mudda_type and sal not provided, try to extract from existing data or filename
        if not mudda_type or not sal:
            cursor = self.conn.cursor()
            cursor.execute('SELECT मुद्दाको_किसिम, साल FROM cases WHERE लिङ्क = ?', (url,))
            result = cursor.fetchone()
            if result:
                mudda_type, sal = result
                print(f"Found existing data: mudda_type={mudda_type}, sal={sal}")
        
        if not mudda_type or not sal:
            print("Warning: mudda_type and sal not provided and couldn't be determined from existing data")
            print("Using generic scraping without HTML file management")
        
        success = self.scrape_case_details_generic(url, mudda_type, sal, use_saved)
        if success:
            print("✓ Successfully scraped and saved to database")
        else:
            print("✗ Failed to scrape")
        
        return success

    def test_saved_html_files(self, mudda_type=None, sal=None, limit=None):
        """Test scraping from saved HTML files"""
        html_files = self.get_saved_html_files_by_criteria(mudda_type, sal)
        
        if not html_files:
            print("No saved HTML files found matching criteria")
            return
        
        print(f"Found {len(html_files)} saved HTML files")
        
        if limit:
            html_files = html_files[:limit]
            print(f"Testing first {limit} files")
        
        successful_count = 0
        failed_count = 0
        
        for html_file in html_files:
            file_mudda_type, file_sal, link_number = self.extract_info_from_filename(html_file)
            
            if not file_mudda_type or not file_sal:
                print(f"Could not extract info from filename: {html_file}")
                failed_count += 1
                continue
            
            # Reconstruct URL (this is a simplified approach)
            url = f"https://nkp.gov.np/full_detail/{link_number}"
            
            print(f"Testing {html_file} -> {file_mudda_type}, {file_sal}")
            
            success = self.scrape_case_details_generic(url, file_mudda_type, file_sal, use_saved=True)
            
            if success:
                successful_count += 1
            else:
                failed_count += 1
        
        print(f"\nTest Results:")
        print(f"✓ Successful: {successful_count}")
        print(f"✗ Failed: {failed_count}")
        print(f"Total: {len(html_files)}")

    def run_scraper(self, mudda_type, sal, use_saved=True):
        """Main method to run the scraper"""
        print(f"Starting scraper for mudda_type: {mudda_type}, sal: {sal}")
        print(f"Using database: {self.output_db}")
        print(f"HTML folder: {self.html_folder}")
        print(f"Use saved HTML files: {use_saved}")
        
        # Validate inputs
        if mudda_type not in self.mudda_type_arr:
            raise ValueError(f"Invalid mudda_type. Must be one of: {self.mudda_type_arr}")
        
        # Generate search URL
        try:
            search_url = self.search_url(mudda_type, sal)
            print(f"Search URL: {search_url}")
        except Exception as e:
            print(f"Error generating search URL: {e}")
            return
        
        # Get all case URLs
        print("Fetching all case URLs...")
        case_urls = self.get_all_pages(search_url, mudda_type, sal, use_saved)
        
        if not case_urls:
            print("No case URLs found!")
            return
        
        print(f"Found {len(case_urls)} case URLs to scrape")
        
        # Scrape each case
        successful_count = 0
        failed_links = []
        
        for i, url in enumerate(case_urls, 1):
            print(f"Processing {i}/{len(case_urls)}: {url}")
            
            success = self.scrape_case_details_generic(url, mudda_type, sal, use_saved)
            if success:
                successful_count += 1
            else:
                failed_links.append(url)
            
            # Add delay between requests only if downloading from web
            if not use_saved:
                time.sleep(2)
        
        # Retry failed links once
        if failed_links:
            print(f"\nRetrying {len(failed_links)} failed links...")
            still_failed = []
            
            for i, url in enumerate(failed_links, 1):
                print(f"Retrying {i}/{len(failed_links)}: {url}")
                
                success = self.scrape_case_details_generic(url, mudda_type, sal, use_saved=False)  # Force web download on retry
                if success:
                    successful_count += 1
                else:
                    still_failed.append(url)
                
                time.sleep(2)
            
            # Save permanently failed links
            if still_failed:
                self.save_failed_links(still_failed, mudda_type, sal, "Failed after retry")
                
                print(f"\nFinal Results:")
                print(f"Total links found: {len(case_urls)}")
                print(f"Successfully scraped: {successful_count}")
                print(f"Failed to scrape: {len(still_failed)}")
                
                if still_failed:
                    print(f"Failed links saved to database: failed_links table")
        else:
            print(f"\nResults:")
            print(f"Total links found: {len(case_urls)}")
            print(f"Successfully scraped: {successful_count}")
        
        print(f"Scraped data saved to SQLite database: {self.output_db}")

    def close(self):
        """Explicitly close the database connection"""
        if hasattr(self, 'conn'):
            self.conn.close()

    def __del__(self):
        """Close SQLite connection when the object is destroyed"""
        self.close()


def create_parser():
    """Create command line argument parser"""
    parser = argparse.ArgumentParser(
        description="Legal Case Scraper for Nepal Kanoon Patrika",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Scrape specific mudda_type and year
  python app.py --mudda_type "दुनियाबादी देवानी" --nepali_year "२०७३" --database_name "app_test_db.db"
  
  # Test a specific link
  python app.py --test_link "https://nkp.gov.np/8035" --mudda_type "दुनियाबादी देवानी" --nepali_year "२०७३"
  
  # Test saved HTML files
  python app.py --test_saved --nepali_year "२०७३" --limit 5
  
  # Use saved HTML files for scraping (faster)
  python app.py --mudda_type "दुनियाबादी देवानी" --nepali_year "२०७३" --use_saved
  
  # List available mudda types
  python app.py --list_mudda_types
        """
    )
    
    parser.add_argument('--mudda_type', type=str, 
                       help='Mudda type (e.g., "दुनियाबादी देवानी")')
    
    parser.add_argument('--nepali_year', type=str,
                       help='Nepali year (e.g., "२०७३")')
    
    parser.add_argument('--database_name', type=str, default='legal_cases_2.db',
                       help='SQLite database filename (default: legal_cases_2.db)')
    
    parser.add_argument('--html_folder', type=str, default='scraped_html',
                       help='Folder to store HTML files (default: scraped_html)')
    
    parser.add_argument('--use_saved', action='store_true',
                       help='Use saved HTML files when available (faster)')
    
    parser.add_argument('--test_link', type=str,
                       help='Test scraping a specific link')
    
    parser.add_argument('--test_saved', action='store_true',
                       help='Test scraping from saved HTML files')
    
    parser.add_argument('--limit', type=int,
                       help='Limit number of files to test (use with --test_saved)')
    
    parser.add_argument('--list_mudda_types', action='store_true',
                       help='List all available mudda types')
    
    return parser


def main():
    """Main function to run the application"""
    parser = create_parser()
    args = parser.parse_args()
    
    # List mudda types if requested
    if args.list_mudda_types:
        temp_scraper = LegalCaseScraper()
        print("Available mudda_type options:")
        for i, option in enumerate(temp_scraper.mudda_type_arr, 1):
            print(f"{i}. {option}")
        temp_scraper.close()
        return
    
    # Create the scraper
    scraper = LegalCaseScraper(
        output_db=args.database_name,
        html_folder=args.html_folder
    )
    
    try:
        # Test single link
        if args.test_link:
            success = scraper.test_single_link(
                args.test_link, 
                args.mudda_type, 
                args.nepali_year,
                use_saved=args.use_saved
            )
            return
        
        # Test saved HTML files
        if args.test_saved:
            scraper.test_saved_html_files(
                mudda_type=args.mudda_type,
                sal=args.nepali_year,
                limit=args.limit
            )
            return
        
        # Regular scraping
        if not args.mudda_type or not args.nepali_year:
            print("Error: --mudda_type and --nepali_year are required for scraping")
            print("Use --help for usage examples")
            return
        
        scraper.run_scraper(
            mudda_type=args.mudda_type,
            sal=args.nepali_year,
            use_saved=args.use_saved
        )
        
    except Exception as e:
        print(f"Error: {e}")
    finally:
        scraper.close()


if __name__ == "__main__":
    main()