Spaces:

rbbist
/

Nepal_Kanun_Patrika_Scrapper

Sleeping

App Files Files Community

Nepal_Kanun_Patrika_Scrapper / Kanun_Patrika_Scraper_For_HFSpaces.py

rbbist

Rename Kanun_Patrika_Scrapper_For_HFSpaces.py to Kanun_Patrika_Scraper_For_HFSpaces.py

2a9e5a8 verified 7 months ago

raw

history blame contribute delete

112 kB

	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	import time
	import os
	import sys
	import argparse
	from urllib.parse import urlencode
	import sqlite3
	import json
	import re
	from pathlib import Path
	import glob
	import nepali_datetime

	class LegalCaseScraper:
	def __init__(self, output_db="legal_cases_2.db", html_folder="scraped_html"):
	self.mudda_type_arr = [
	"दुनियाबादी देवानी",
	"सरकारबादी देवानी",
	"दुनियावादी फौजदारी",
	"सरकारवादी फौजदारी",
	"रिट",
	"निवेदन",
	"विविध"
	]
	self.successful_entries = 0
	self.not_entered_links = []
	self.still_not_entered_links = []
	self.output_db = output_db
	self.html_folder = html_folder

	# Create HTML folder if it doesn't exist
	os.makedirs(self.html_folder, exist_ok=True)

	# Initialize SQLite database
	self.conn = sqlite3.connect(self.output_db)
	self.create_tables()

	def create_tables(self):
	"""Create SQLite tables for scraped data and failed links"""
	cursor = self.conn.cursor()

	# Table for scraped case data
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS cases (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	लिङ्क TEXT UNIQUE,
	निर्णय_नं TEXT,
	भाग TEXT,
	मुद्दाको_किसिम TEXT,
	साल TEXT,
	महिना TEXT,
	अंक TEXT,
	फैसला_मिति TEXT,
	अदालत_वा_इजलास TEXT,
	न्यायाधीश TEXT,
	आदेश_मिति TEXT,
	केस_नम्बर TEXT,
	विषय TEXT,
	निवेदक TEXT,
	विपक्षी TEXT,
	प्रकरण TEXT,
	ठहर TEXT,
	html_file_path TEXT,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
	)
	''')

	# Table for failed links
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS failed_links (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	मुद्दाको_किसिम TEXT,
	साल TEXT,
	लिङ्क TEXT,
	error_message TEXT,
	retry_count INTEGER DEFAULT 0,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
	)
	''')

	self.conn.commit()

	def get_mudda_type_number(self, mudda_type):
	"""Get mudda type number (1-7) from mudda type name"""
	try:
	return str(self.mudda_type_arr.index(mudda_type) + 1)
	except ValueError:
	raise ValueError(f"Invalid mudda_type: {mudda_type}. Must be one of {self.mudda_type_arr}")

	def extract_link_number(self, url):
	"""Extract the number at the end of the URL"""
	match = re.search(r'/(\d+)/?$', url)
	return match.group(1) if match else "unknown"

	def generate_html_filename(self, url, mudda_type, sal):
	"""Generate standardized HTML filename: mudda_number_year_link_number.html"""
	mudda_number = self.get_mudda_type_number(mudda_type)
	english_sal = self.nepali_sal_to_english_sal(sal)
	link_number = self.extract_link_number(url)
	return f"{mudda_number}_{english_sal}_{link_number}.html"

	def nepali_sal_to_english_sal(self, sal):
	"""Convert Nepali numerals to English numerals"""
	if not sal:
	return ""

	nepali_to_english = {
	'०': '0', '१': '1', '२': '2', '३': '3', '४': '4',
	'५': '5', '६': '6', '७': '7', '८': '8', '९': '9'
	}
	try:
	return ''.join(nepali_to_english.get(char, char) for char in str(sal))
	except (TypeError, AttributeError):
	raise ValueError(f"Input must be a string containing Nepali numerals, got: {type(sal)}")

	def search_url(self, mudda_type, sal):
	"""Generate search URL based on mudda_type and sal"""
	mudda_types = {name: str(idx + 1) for idx, name in enumerate(self.mudda_type_arr)}

	if mudda_type not in mudda_types:
	raise ValueError(f"Invalid mudda_type: {mudda_type}. Must be one of {self.mudda_type_arr}")

	english_sal = self.nepali_sal_to_english_sal(sal)
	base_url = "https://nkp.gov.np/"
	params = {
	"mudda_number": "",
	"faisala_date_from": "",
	"faisala_date_to": "",
	"mudda_type": mudda_types[mudda_type],
	"mudda_name": "",
	"badi": "",
	"pratibadi": "",
	"judge": "",
	"ijlas_type": "",
	"nirnaya_number": "",
	"faisala_type": "",
	"keywords": "",
	"edition": "",
	"year": english_sal,
	"month": "",
	"volume": "",
	"Submit": "खोज्‍नुहोस्"
	}
	return f"{base_url}?{urlencode(params)}#"

	def save_html_file(self, url, html_content, mudda_type, sal):
	"""Save HTML content to file with standardized naming"""
	filename = self.generate_html_filename(url, mudda_type, sal)
	filepath = os.path.join(self.html_folder, filename)

	with open(filepath, "w", encoding="utf-8") as f:
	f.write(html_content)

	return filepath

	def load_html_file(self, url, mudda_type, sal):
	"""Load HTML content from existing file"""
	filename = self.generate_html_filename(url, mudda_type, sal)
	filepath = os.path.join(self.html_folder, filename)

	if os.path.exists(filepath):
	with open(filepath, "r", encoding="utf-8") as f:
	return f.read()
	return None

	def return_soup(self, url, mudda_type=None, sal=None, use_saved=True, max_retries=3):
	"""Get soup object from URL or saved HTML file"""
	# Try to load from saved file first if requested
	if use_saved and mudda_type and sal:
	html_content = self.load_html_file(url, mudda_type, sal)
	if html_content:
	print(f"Using saved HTML file for {url}")
	return BeautifulSoup(html_content, 'html.parser')

	# Download from web if not found in saved files or use_saved is False
	for attempt in range(max_retries):
	try:
	r = requests.get(url, timeout=30, headers={
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
	})
	if r.status_code == 200:
	r.encoding = 'utf-8'

	# Save HTML file if mudda_type and sal are provided
	if mudda_type and sal:
	filepath = self.save_html_file(url, r.text, mudda_type, sal)
	print(f"Saved HTML to: {filepath}")

	return BeautifulSoup(r.text, 'html.parser')
	else:
	print(f"Attempt {attempt + 1}: Failed to retrieve {url}. Status code: {r.status_code}")
	if attempt < max_retries - 1:
	time.sleep(2 ** attempt)

	except requests.exceptions.RequestException as e:
	print(f"Attempt {attempt + 1}: Error scraping {url}: {e}")
	if attempt < max_retries - 1:
	time.sleep(2 ** attempt)

	return None

	def get_saved_html_files_by_criteria(self, mudda_type=None, sal=None):
	"""Get list of saved HTML files matching criteria"""
	pattern = "*"

	if mudda_type and sal:
	mudda_number = self.get_mudda_type_number(mudda_type)
	english_sal = self.nepali_sal_to_english_sal(sal)
	pattern = f"{mudda_number}_{english_sal}_*.html"
	elif sal:
	english_sal = self.nepali_sal_to_english_sal(sal)
	pattern = f"_{english_sal}_.html"
	elif mudda_type:
	mudda_number = self.get_mudda_type_number(mudda_type)
	pattern = f"{mudda_number}__.html"

	search_path = os.path.join(self.html_folder, pattern)
	return glob.glob(search_path)

	def extract_info_from_filename(self, filename):
	"""Extract mudda_type, sal, and link_number from filename"""
	basename = os.path.basename(filename)
	match = re.match(r'(\d+)_(\d+)_(\d+)\.html', basename)

	if match:
	mudda_number, sal, link_number = match.groups()
	mudda_type = self.mudda_type_arr[int(mudda_number) - 1]
	return mudda_type, sal, link_number

	return None, None, None

	def from_each_page(self, links):
	"""Extract unique case links from page links"""
	li = []
	flag = False
	i = 0
	while(i < len(links)):
	href = links[i].get('href')
	if href and "#" in href:
	i+=1
	if i < len(links):
	temp_href = links[i].get('href')
	if temp_href:
	li.append(temp_href)
	else:
	i+=1
	unique_list = []
	if(len(li) > 1):
	unique_list = list(dict.fromkeys(li))
	return unique_list

	def get_all_pages(self, initial_url, mudda_type=None, sal=None, use_saved=True):
	"""Get all page URLs for pagination"""
	soup = self.return_soup(initial_url, mudda_type, sal, use_saved)
	if not soup:
	return []

	links = soup.find_all('a')
	all_links = []
	other_pages = []

	for link in links:
	href = link.get('href')
	if href:
	all_links.append(href)
	if "https://nkp.gov.np/advance_search/" in href:
	other_pages.append(href)

	unique_list = self.from_each_page(links)

	# Handle pagination
	if "javascript:void(0)" in all_links and other_pages:
	mx = 0
	for j in other_pages:
	temp = ""
	for i in range(len(j)-1, -1, -1):
	if j[i] == "=":
	break
	temp = j[i] + temp
	try:
	temp2 = int(temp)
	if mx < temp2:
	mx = temp2
	except ValueError:
	continue
	if mx > 0:
	st = other_pages[0][:-2]
	real_other_pages = []
	for i in range(20, mx + 1, 20):
	real_other_pages.append(st + str(i))

	unique_list2 = []
	for page_url in real_other_pages:
	print(f"Processing page: {page_url}")
	try:
	page_soup = self.return_soup(page_url, mudda_type, sal, use_saved)
	if page_soup:
	page_links = page_soup.find_all('a')
	unique_list2 += self.from_each_page(page_links)
	except Exception as e:
	print(f"Error scraping page {page_url}: {e}")

	unique_list += unique_list2

	# Remove duplicates
	unique_unique_list = list(dict.fromkeys(unique_list))
	return unique_unique_list

	def get_edition_field(self, soup, label):
	"""Extract edition field from soup"""
	edition_info = soup.find("div", id="edition-info")
	if edition_info:
	for span in edition_info.find_all("span"):
	if label in span.text:
	strong = span.find("strong")
	return strong.text.strip() if strong else None
	return None

	def determine_scraper_method(self, sal):
	"""Determine which scraper method to use based on year"""
	eng_sal = int(self.nepali_sal_to_english_sal(sal))

	today = nepali_datetime.date.today()
	latest_nepali_year = int(today.year)

	if 2015 <= eng_sal <= 2044:
	return self.scrape_case_details_2015_to_2044
	elif 2045 <= eng_sal <= 2050:
	return self.scrape_case_details_2045_to_2050
	elif 2051 <= eng_sal <= 2061:
	return self.scrape_case_details_2051_to_2061
	elif 2062 <= eng_sal <= 2072:
	return self.scrape_case_details_2062_to_2072
	elif 2073 <= eng_sal < latest_nepali_year:
	return self.scrape_case_details_2073_to_2080_and_beyond
	else:
	raise ValueError(f"No scraper method available for year {eng_sal} or those records not yet available in Nepal Kanun Patrika Website")

	def scrape_case_details_generic(self, url, mudda_type, sal, use_saved=True):
	"""Generic method that routes to the appropriate scraper based on year"""
	try:
	scraper_method = self.determine_scraper_method(sal)
	return scraper_method(url, mudda_type, sal, use_saved)
	except ValueError as e:
	print(f"Error: {e}")
	return False

	# [Previous scraper methods with modifications for HTML file handling]
	def scrape_case_details_2015_to_2044(self, url, mudda_type, sal=None, use_saved=True):
	"""Scrape details from a single case URL (2015-2044)"""
	try:
	cursor = self.conn.cursor()
	cursor.execute('SELECT लिङ्क FROM cases WHERE लिङ्क = ?', (url,))
	if cursor.fetchone():
	print(f"URL {url} already exists in database, skipping...")
	return True

	# Get soup using saved HTML or web
	soup = self.return_soup(url, mudda_type, sal, use_saved)
	if not soup:
	print(f"Failed to get content for {url}")
	return False

	# Extract basic information
	title_tag = soup.find("h1", class_="post-title")
	decision_title = title_tag.get_text(strip=True).split()[2] if title_tag and len(title_tag.get_text(strip=True).split()) > 2 else "N/A"

	bhaag = self.get_edition_field(soup, "भाग")
	saal = self.get_edition_field(soup, "साल")
	mahina = self.get_edition_field(soup, "महिना")
	anka = self.get_edition_field(soup, "अंक")

	# Extract decision date
	post_meta = soup.find("div", class_="post-meta")
	decision_date = "N/A"
	if post_meta and "फैसला मिति" in post_meta.text:
	try:
	decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]
	except IndexError:
	decision_date = "N/A"

	# Extract detailed information
	div_tag = soup.find("div", id="faisala_detail ")
	details = {}

	if div_tag:
	tags = div_tag.find_all(['h1', 'p'])
	n = len(tags)
	ind = 0
	temp_ind_32 = ind
	KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
	KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
	KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
	KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
	KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
	KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
	KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
	KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
	KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]

	# Extract court information
	temp_ijlash = ""
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if any(kw == text for kw in KEYWORDS_6):
	if "निर्णय नं." not in temp_ijlash:
	details["इजलास"] = temp_ijlash
	ind+=1
	break
	elif any(kw in text for kw in KEYWORDS_6):
	details["इजलास"] = text
	ind+=1
	text_2 = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text_2 for kw in KEYWORDS_8) == False:
	details["इजलास"] = text +" "+ text_2
	ind+=1
	break
	elif any(kw in text for kw in KEYWORDS_8):
	if "निर्णय नं." not in temp_ijlash:
	details["इजलास"] = temp_ijlash
	break
	temp_ijlash = text
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Extract judges
	judges = []
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if any(kw in text for kw in KEYWORDS_8):
	judges.append(text)
	else:
	details["न्यायाधीश"] = judges
	if any(kw in text for kw in KEYWORDS_10):
	details["केस_नम्बर"] = text
	ind+=1
	elif any(kw2 in text for kw2 in KEYWORDS_3) == False and any(kw2 in text for kw2 in KEYWORDS_5) == False:
	details["केस_नम्बर"] = text
	ind+=1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Standard case structure
	bisaya_before_niweduck = False
	temp_ind_64 = ind

	while temp_ind_64 < n:
	text = tags[temp_ind_64].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_3) or any(kw in text for kw in KEYWORDS_4):
	break
	if any(kw in text for kw in KEYWORDS_5):
	bisaya_before_niweduck = True
	break
	temp_ind_64+=1

	if bisaya_before_niweduck:
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(text.startswith(kw) for kw in KEYWORDS_5):
	details["विषय"] = text
	ind+=1
	break
	if any(kw in text for kw in KEYWORDS_3):
	ind = temp_ind_32
	break
	ind+=1
	else:
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
	details["आदेश मिति"] = text
	ind+=1
	break
	if any(kw in text for kw in KEYWORDS_3):
	ind = temp_ind_32
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_3):
	if any(kw2 == text for kw2 in KEYWORDS_3):
	ind += 1
	text = tags[ind].get_text(separator=' ', strip=True)
	details["निवेदक"] = text
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_4):
	if any(kw2 == text for kw2 in KEYWORDS_4):
	ind += 1
	text = tags[ind].get_text(separator=' ', strip=True)
	details["विपक्षी"] = text
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	if bisaya_before_niweduck==False:
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(text.startswith(kw) for kw in KEYWORDS_5):
	details["विषय"] = text
	ind+=1
	break
	if any(kw in text for kw in KEYWORDS_2):
	ind = temp_ind_32
	break
	ind+=1
	else:
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
	details["आदेश मिति"] = text
	ind+=1
	break
	if any(kw in text for kw in KEYWORDS_2):
	ind = temp_ind_32
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Extract prakarans and tahar
	prakarans = []
	prev = ""
	tahar = []
	temp_flag_tahar = False

	for tag in tags[ind:]:
	text = tag.get_text(separator=' ', strip=True)
	if text:
	#if "§" in text or any(kw in text for kw in KEYWORDS_2):
	#prakarans.append(text)
	if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if any(text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 23: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(text)
	prev = ""
	if "§" in text:
	prakarans.append(text)
	if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if not prakarans:
	prakarans.append(prev)
	else:
	prev = prev + " " + text if prev else text

	if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(text)

	# Process list items
	next_sib = tag.find_next_sibling()
	while next_sib and next_sib.name in ['ul', 'ol']:
	for li in next_sib.find_all('li'):
	li_text = li.get_text(separator=' ', strip=True)
	if li_text:
	if any(li_text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 24: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(li_text)
	prev = ""
	else:
	prev = prev + " " + li_text if prev else li_text
	if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(li_text)
	next_sib = next_sib.find_next_sibling()

	details["प्रकरण"] = prakarans
	details["ठहर"] = tahar

	# Get HTML file path
	html_file_path = ""
	if mudda_type and sal:
	filename = self.generate_html_filename(url, mudda_type, sal)
	html_file_path = os.path.join(self.html_folder, filename)

	# Combine all data
	data = {
	"लिङ्क": url,
	"निर्णय नं.": decision_title,
	"भाग": bhaag or "N/A",
	"मुद्दाको किसिम": mudda_type,
	"साल": saal or "N/A",
	"महिना": mahina or "N/A",
	"अंक": anka or "N/A",
	"फैसला मिति": f"'{decision_date}'",
	"अदालत / इजलास": details.get("इजलास", "N/A"),
	"न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
	"आदेश मिति": details.get("आदेश मिति", "N/A"),
	"केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
	"विषय": details.get("विषय", "N/A"),
	"निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
	"विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
	"प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
	"ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
	"html_file_path": html_file_path
	}

	# Save to SQLite
	self.save_to_sqlite(data)
	print(f"{url} - Successfully Scraped and Entered")
	return True

	except Exception as e:
	print(f"Error scraping {url}: {e}")
	return False

	def scrape_case_details_2045_to_2050(self, url, mudda_type, sal = None, use_saved=True): # CHANGE 4: Remove output_db parameter
	"""Scrape details from a single case URL"""
	try:
	# IMPROVEMENT 16: Check if URL already exists in database
	cursor = self.conn.cursor()
	cursor.execute('SELECT लिङ्क FROM cases WHERE लिङ्क = ?', (url,))
	if cursor.fetchone():
	print(f"URL {url} already exists in database, skipping...")
	return True

	# Get soup using saved HTML or web
	soup = self.return_soup(url, mudda_type, sal, use_saved)
	if not soup:
	print(f"Failed to get content for {url}")
	return False

	# Extract basic information
	title_tag = soup.find("h1", class_="post-title")
	decision_title = title_tag.get_text(strip=True).split()[2] if title_tag and len(title_tag.get_text(strip=True).split()) > 2 else "N/A" # IMPROVEMENT 19: Bounds checking

	bhaag = self.get_edition_field(soup, "भाग")
	saal = self.get_edition_field(soup, "साल")
	mahina = self.get_edition_field(soup, "महिना")
	anka = self.get_edition_field(soup, "अंक")

	# Extract decision date
	post_meta = soup.find("div", class_="post-meta")
	decision_date = "N/A"
	if post_meta and "फैसला मिति" in post_meta.text:
	try: # IMPROVEMENT 20: Better error handling for date extraction
	decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]
	except IndexError:
	decision_date = "N/A"

	# Extract detailed information
	div_tag = soup.find("div", id="faisala_detail ")
	details = {}

	if div_tag:
	tags = div_tag.find_all(['h1', 'p'])
	n = len(tags)
	ind = 0
	temp_ind_32 = ind
	KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
	KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
	KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
	KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
	KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
	KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
	KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
	KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
	KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]

	# Extract court information
	temp_ijlash = ""
	while(ind < n):
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if any(kw == text for kw in KEYWORDS_6):
	details["इजलास"] = temp_ijlash
	ind+=1
	break
	elif any(kw in text for kw in KEYWORDS_6):
	details["इजलास"] = text
	ind+=1
	break
	elif "न्यायाधीश" in text or "माननीय" in text:
	details["इजलास"] = temp_ijlash
	break
	temp_ijlash = text
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Extract judges
	judges = []
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if "न्यायाधीश" in text or "माननीय" in text:
	judges.append(text)
	elif any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
	details["न्यायाधीश"] = judges
	details["आदेश मिति"] = text
	ind += 1
	break
	else:
	details["केस_नम्बर"] = text
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Standard case structure
	bisaya_before_niweduck = False
	details["विषय"] = ""

	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_3) or any(kw in text for kw in KEYWORDS_4):
	break
	if any(kw in text for kw in KEYWORDS_5):
	bisaya_before_niweduck = True
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	if bisaya_before_niweduck:
	while ind < n:
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_5):
	details["विषय"] = text
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	#temp_Ind = ind
	while ind < n:
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_3):
	if any(kw2 == text for kw2 in KEYWORDS_3):
	ind += 1
	text = tags[ind].get_text(separator=' ', strip=True)
	details["निवेदक"] = text
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	while ind < n:
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_4):
	if any(kw2 == text for kw2 in KEYWORDS_4):
	ind += 1
	text = tags[ind].get_text(separator=' ', strip=True)
	details["विपक्षी"] = text
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	if bisaya_before_niweduck==False:
	while ind < n:
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_5):
	details["विषय"] = text
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Extract prakarans and tahar
	prakarans = []
	prev = ""


	tahar = []
	temp_flag_tahar = False

	for tag in tags[ind:]:
	text = tag.get_text(separator=' ', strip=True)
	if text:
	#if "§" in text or any(kw in text for kw in KEYWORDS_2):
	#prakarans.append(text)
	if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if any(text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 23: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(text)
	prev = ""
	if "§" in text:
	prakarans.append(text)
	if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if not prakarans:
	prakarans.append(prev)
	else:
	prev = prev + " " + text if prev else text

	if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(text)

	# Process list items
	next_sib = tag.find_next_sibling()
	while next_sib and next_sib.name in ['ul', 'ol']:
	for li in next_sib.find_all('li'):
	li_text = li.get_text(separator=' ', strip=True)
	if li_text:
	if any(li_text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 24: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(li_text)
	prev = ""
	else:
	prev = prev + " " + li_text if prev else li_text
	if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(li_text)
	next_sib = next_sib.find_next_sibling()

	details["प्रकरण"] = prakarans
	details["ठहर"] = tahar

	# Get HTML file path
	html_file_path = ""
	if mudda_type and sal:
	filename = self.generate_html_filename(url, mudda_type, sal)
	html_file_path = os.path.join(self.html_folder, filename)

	# Combine all data, handling lists and strings appropriately
	data = {
	"लिङ्क": url,
	"निर्णय नं.": decision_title,
	"भाग": bhaag or "N/A", # IMPROVEMENT 25: Handle None values
	"मुद्दाको किसिम": mudda_type,
	"साल": saal or "N/A",
	"महिना": mahina or "N/A",
	"अंक": anka or "N/A",
	"फैसला मिति": f"'{decision_date}'",
	"अदालत / इजलास": details.get("इजलास", "N/A"),
	"न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
	"आदेश मिति": details.get("आदेश मिति", "N/A"),
	"केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
	"विषय": details.get("विषय", "N/A"),
	"निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
	"विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
	"प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
	"ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
	"html_file_path": html_file_path
	}

	# Save to SQLite
	self.save_to_sqlite(data)
	print(f"{url} - Successfully Scraped and Entered")
	return True

	except Exception as e:
	print(f"Error scraping {url}: {e}")
	return False

	def scrape_case_details_2051_to_2061(self, url, mudda_type, sal = None, use_saved=True): # CHANGE 4: Remove output_db parameter
	"""Scrape details from a single case URL"""
	try:
	# IMPROVEMENT 16: Check if URL already exists in database
	cursor = self.conn.cursor()
	cursor.execute('SELECT लिङ्क FROM cases WHERE लिङ्क = ?', (url,))
	if cursor.fetchone():
	print(f"URL {url} already exists in database, skipping...")
	return True

	# Get soup using saved HTML or web
	soup = self.return_soup(url, mudda_type, sal, use_saved)
	if not soup:
	print(f"Failed to get content for {url}")
	return False

	# Extract basic information
	title_tag = soup.find("h1", class_="post-title")
	decision_title = title_tag.get_text(strip=True).split()[2] if title_tag and len(title_tag.get_text(strip=True).split()) > 2 else "N/A" # IMPROVEMENT 19: Bounds checking

	bhaag = self.get_edition_field(soup, "भाग")
	saal = self.get_edition_field(soup, "साल")
	mahina = self.get_edition_field(soup, "महिना")
	anka = self.get_edition_field(soup, "अंक")

	# Extract decision date
	post_meta = soup.find("div", class_="post-meta")
	decision_date = "N/A"
	if post_meta and "फैसला मिति" in post_meta.text:
	try: # IMPROVEMENT 20: Better error handling for date extraction
	decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]
	except IndexError:
	decision_date = "N/A"

	# Extract detailed information
	div_tag = soup.find("div", id="faisala_detail ")
	details = {}

	if div_tag:
	tags = div_tag.find_all(['h1', 'p'])
	n = len(tags)
	ind = 0
	temp_ind_32 = ind
	KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
	KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
	KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
	KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
	KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
	KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
	KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
	KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
	KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]

	# Extract court information
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if text and ("इजलास" in text or "इजालास" in text):
	details["इजलास"] = text
	ind += 1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Extract judges
	judges = []
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if "न्यायाधीश" in text or "माननीय" in text:
	judges.append(text)
	elif any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
	details["न्यायाधीश"] = judges
	details["आदेश मिति"] = text
	ind += 1
	break
	else:
	details["केस_नम्बर"] = text
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Standard case structure
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if "विषय" in text or "मुद्दा" in text or "बिषय" in text or "मूद्दाः" in text:
	details["विषय"] = text
	ind += 1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32

	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_3):
	if any(kw2 == text for kw2 in KEYWORDS_3):
	ind += 1
	if ind < n: # IMPROVEMENT 21: Bounds checking
	text = tags[ind].get_text(separator=' ', strip=True)
	details["निवेदक"] = text
	ind += 1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_4):
	if any(kw2 == text for kw2 in KEYWORDS_4):
	ind += 1
	if ind < n: # IMPROVEMENT 22: Bounds checking
	text = tags[ind].get_text(separator=' ', strip=True)
	details["विपक्षी"] = text
	ind += 1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Extract prakarans and tahar
	prakarans = []
	prev = ""


	tahar = []
	temp_flag_tahar = False

	for tag in tags[ind:]:
	text = tag.get_text(separator=' ', strip=True)
	if text:
	#if "§" in text or any(kw in text for kw in KEYWORDS_2):
	#prakarans.append(text)
	if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if any(text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 23: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(text)
	prev = ""
	if "§" in text:
	prakarans.append(text)
	if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if not prakarans:
	prakarans.append(prev)
	else:
	prev = prev + " " + text if prev else text

	if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(text)

	# Process list items
	next_sib = tag.find_next_sibling()
	while next_sib and next_sib.name in ['ul', 'ol']:
	for li in next_sib.find_all('li'):
	li_text = li.get_text(separator=' ', strip=True)
	if li_text:
	if any(li_text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 24: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(li_text)
	prev = ""
	else:
	prev = prev + " " + li_text if prev else li_text
	if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(li_text)
	next_sib = next_sib.find_next_sibling()

	details["प्रकरण"] = prakarans
	details["ठहर"] = tahar

	# Get HTML file path
	html_file_path = ""
	if mudda_type and sal:
	filename = self.generate_html_filename(url, mudda_type, sal)
	html_file_path = os.path.join(self.html_folder, filename)

	# Combine all data, handling lists and strings appropriately
	data = {
	"लिङ्क": url,
	"निर्णय नं.": decision_title,
	"भाग": bhaag or "N/A", # IMPROVEMENT 25: Handle None values
	"मुद्दाको किसिम": mudda_type,
	"साल": saal or "N/A",
	"महिना": mahina or "N/A",
	"अंक": anka or "N/A",
	"फैसला मिति": f"'{decision_date}'",
	"अदालत / इजलास": details.get("इजलास", "N/A"),
	"न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
	"आदेश मिति": details.get("आदेश मिति", "N/A"),
	"केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
	"विषय": details.get("विषय", "N/A"),
	"निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
	"विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
	"प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
	"ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
	"html_file_path": html_file_path
	}

	# Save to SQLite
	self.save_to_sqlite(data)
	print(f"{url} - Successfully Scraped and Entered")
	return True

	except Exception as e:
	print(f"Error scraping {url}: {e}")
	return False

	def scrape_case_details_2062_to_2072(self, url, mudda_type, sal = None, use_saved=True): # CHANGE 4: Remove output_db parameter
	"""Scrape details from a single case URL"""
	try:
	# IMPROVEMENT 16: Check if URL already exists in database
	cursor = self.conn.cursor()
	cursor.execute('SELECT लिङ्क FROM cases WHERE लिङ्क = ?', (url,))
	if cursor.fetchone():
	print(f"URL {url} already exists in database, skipping...")
	return True
	# Get soup using saved HTML or web
	soup = self.return_soup(url, mudda_type, sal, use_saved)
	if not soup:
	print(f"Failed to get content for {url}")
	return False

	# Extract basic information
	title_tag = soup.find("h1", class_="post-title")
	decision_title = title_tag.get_text(strip=True).split()[2] if title_tag and len(title_tag.get_text(strip=True).split()) > 2 else "N/A" # IMPROVEMENT 19: Bounds checking

	bhaag = self.get_edition_field(soup, "भाग")
	saal = self.get_edition_field(soup, "साल")
	mahina = self.get_edition_field(soup, "महिना")
	anka = self.get_edition_field(soup, "अंक")

	# Extract decision date
	post_meta = soup.find("div", class_="post-meta")
	decision_date = "N/A"
	if post_meta and "फैसला मिति" in post_meta.text:
	try: # IMPROVEMENT 20: Better error handling for date extraction
	decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]
	except IndexError:
	decision_date = "N/A"

	# Extract detailed information
	div_tag = soup.find("div", id="faisala_detail ")
	details = {}

	if div_tag:
	tags = div_tag.find_all(['h1', 'p'])
	n = len(tags)
	ind = 0
	temp_ind_32 = ind
	KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
	KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
	KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
	KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
	KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
	KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
	KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
	KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
	KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]

	# Extract court information
	temp_ijlash = ""
	while(ind < n):
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if any(kw == text for kw in KEYWORDS_6):
	if "निर्णय नं." not in temp_ijlash:
	details["अदालत / इजलास"] = temp_ijlash
	ind+=1
	break
	elif any(kw in text for kw in KEYWORDS_6):
	details["अदालत / इजलास"] = text
	ind+=1
	text_2 = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text_2 for kw in KEYWORDS_8) == False:
	details["अदालत / इजलास"] = text +" "+ text_2
	ind+=1
	break
	elif any(kw in text for kw in KEYWORDS_8):
	if "निर्णय नं." not in temp_ijlash:
	details["अदालत / इजलास"] = temp_ijlash
	break
	temp_ijlash = text
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Extract judges
	judges = []
	faisla_miti_before_case_no = False
	subject_before_case_no = False
	while(ind < n):
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if any(kw in text for kw in KEYWORDS_8):
	judges.append(text)
	else:
	details["न्यायाधीश"] = judges
	if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
	details["आदेश मिति"] = text
	ind+=1
	faisla_miti_before_case_no = True
	elif any(kw in text for kw in KEYWORDS_10):
	details["केस_नम्बर"] = text
	elif any(kw2 in text for kw2 in KEYWORDS_3) == False and any(kw2 in text for kw2 in KEYWORDS_5) == False:
	if text!="फैसला":
	details["केस_नम्बर"] = text
	else:
	ind+=1
	details["केस_नम्बर"] = tags[ind].get_text(separator=' ', strip=True)
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Standard case structure

	if faisla_miti_before_case_no:
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if any(kw in text for kw in KEYWORDS_10):
	details["केस_नम्बर"] = text
	elif any(text.startswith(kw) for kw in KEYWORDS_5):
	subject_before_case_no = True
	details["विषय"] = text
	else:
	details["केस_नम्बर"] = text
	ind+=1
	break
	ind+=1
	else:
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
	details["आदेश मिति"] = text
	ind+=1
	break
	if any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	ind = temp_ind_32
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	if subject_before_case_no:
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	details["केस_नम्बर"] = text
	ind+=1
	break
	ind+=1
	else:
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if text:
	if any(text.startswith(kw) for kw in KEYWORDS_5):
	details["विषय"] = text
	ind+=1
	break
	if any(kw in text for kw in KEYWORDS_3):
	ind = temp_ind_32
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	temp_ind_64 = ind
	count_how_many = 0

	while temp_ind_64 < n:
	text = tags[temp_ind_64].get_text(separator=' ', strip=True)
	if text and any(kw == text for kw in KEYWORDS_9):
	count_how_many += 1
	if any(text.startswith(kw) for kw in KEYWORDS_2):
	break
	temp_ind_64+=1


	if count_how_many > 1:
	case_no = []
	appellant = []
	opposition = []
	while count_how_many > 0:
	while ind < n:
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_3):
	if any(kw2 == text for kw2 in KEYWORDS_3):
	ind += 1
	text = tags[ind].get_text(separator=' ', strip=True)
	appellant.append(text)
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	while ind < n:
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_4):
	if any(kw2 == text for kw2 in KEYWORDS_4):
	ind += 1
	text = tags[ind].get_text(separator=' ', strip=True)
	opposition.append(text)
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	count_how_many-=1

	temp_ind_128 = 0

	while temp_ind_128 < n:
	text = tags[temp_ind_128].get_text(separator=' ', strip=True)
	if text:
	if any(kw in text for kw in KEYWORDS_10):
	case_no.append(text)
	if any(text.startswith(kw) for kw in KEYWORDS_2) or any(kw == text for kw in KEYWORDS_7):
	break
	temp_ind_128 += 1

	details["केस_नम्बर"] = case_no
	details["निवेदक"] = appellant
	details["विपक्षी"] = opposition

	else:
	while ind < n:
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_3):
	if any(kw2 == text for kw2 in KEYWORDS_3):
	ind += 1
	text = tags[ind].get_text(separator=' ', strip=True)
	details["निवेदक"] = text
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	while ind < n:
	#text = p_tags[ind].get_text(strip=True)
	text = tags[ind].get_text(separator=' ', strip=True)
	if any(kw in text for kw in KEYWORDS_4):
	if any(kw2 == text for kw2 in KEYWORDS_4):
	ind += 1
	text = tags[ind].get_text(separator=' ', strip=True)
	details["विपक्षी"] = text
	ind+=1
	break
	ind+=1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind


	# Extract prakarans and tahar
	prakarans = []
	prev = ""


	tahar = []
	temp_flag_tahar = False

	for tag in tags[ind:]:
	text = tag.get_text(separator=' ', strip=True)
	if text:
	#if "§" in text or any(kw in text for kw in KEYWORDS_2):
	#prakarans.append(text)
	if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if any(text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 23: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(text)
	prev = ""
	if "§" in text:
	prakarans.append(text)
	if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if not prakarans:
	prakarans.append(prev)
	else:
	prev = prev + " " + text if prev else text

	if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(text)

	# Process list items
	next_sib = tag.find_next_sibling()
	while next_sib and next_sib.name in ['ul', 'ol']:
	for li in next_sib.find_all('li'):
	li_text = li.get_text(separator=' ', strip=True)
	if li_text:
	if any(li_text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 24: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(li_text)
	prev = ""
	else:
	prev = prev + " " + li_text if prev else li_text
	if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(li_text)
	next_sib = next_sib.find_next_sibling()

	details["प्रकरण"] = prakarans
	details["ठहर"] = tahar

	# Get HTML file path
	html_file_path = ""
	if mudda_type and sal:
	filename = self.generate_html_filename(url, mudda_type, sal)
	html_file_path = os.path.join(self.html_folder, filename)

	# Combine all data, handling lists and strings appropriately
	data = {
	"लिङ्क": url,
	"निर्णय नं.": decision_title,
	"भाग": bhaag or "N/A", # IMPROVEMENT 25: Handle None values
	"मुद्दाको किसिम": mudda_type,
	"साल": saal or "N/A",
	"महिना": mahina or "N/A",
	"अंक": anka or "N/A",
	"फैसला मिति": f"'{decision_date}'",
	"अदालत / इजलास": details.get("अदालत / इजलास", "N/A"),
	"न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
	"आदेश मिति": details.get("आदेश मिति", "N/A"),
	"केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
	"विषय": details.get("विषय", "N/A"),
	"निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
	"विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
	"प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
	"ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
	"html_file_path": html_file_path
	}

	# Save to SQLite
	self.save_to_sqlite(data)
	print(f"{url} - Successfully Scraped and Entered")
	return True

	except Exception as e:
	print(f"Error scraping {url}: {e}")
	return False

	def scrape_case_details_2073_to_2080_and_beyond(self, url, mudda_type, sal = None, use_saved=True):
	"""Scrape details from a single case URL"""
	try:
	r = requests.get(url, timeout=15)
	if r.status_code != 200:
	print(f"Failed to retrieve {url}, Status code: {r.status_code}")
	return False

	# Get soup using saved HTML or web
	soup = self.return_soup(url, mudda_type, sal, use_saved)
	if not soup:
	print(f"Failed to get content for {url}")
	return False

	# Extract basic information
	title_tag = soup.find("h1", class_="post-title")
	decision_title = title_tag.get_text(strip=True).split()[2] if title_tag else "N/A"

	bhaag = self.get_edition_field(soup, "भाग")
	saal = self.get_edition_field(soup, "साल")
	mahina = self.get_edition_field(soup, "महिना")
	anka = self.get_edition_field(soup, "अंक")

	# Extract decision date
	post_meta = soup.find("div", class_="post-meta")
	decision_date = "N/A"
	if post_meta and "फैसला मिति" in post_meta.text:
	decision_date = post_meta.text.strip().split("फैसला मिति :")[-1].split("\n")[0].strip().split()[0]

	# Extract detailed information
	div_tag = soup.find("div", id="faisala_detail ")
	details = {}

	if div_tag:
	tags = div_tag.find_all(['h1', 'p'])
	n = len(tags)
	ind = 0
	temp_ind_32 = ind
	#KEYWORDS_2 = ["(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र.नं."]

	KEYWORDS_2 = ["प्रकरण नं.", "(प्रकरण नं", "(प्रकारण नं.", "९प्रकरण नं।", "(प्रकरण", "(प्र नं.","( प्र. नं","(प्र.नं", "(प्र. नं", "( प्रकरण नं.", "( प्रकरणन", "( प्र.नं.", "( प्र . नं .", "( प ्र . नं .", "(प्ररकण नं.", "(प्रकराण नं."]
	KEYWORDS_3 = ["निवेदक", "वादी", "पुनरावेदक", "निबेदक", "पुनरावदेक", "निवेदिका", "निवेदीका", "निवदेक", "न ि वेदक ः", "नि वेदक ः", "पुनरावेदन", "पुनरवेदिका", "पुनरावेदिका", "पुनरावेदीका", "बादि", "पुनराबेदक", "प्रतिबादी", "पुनरावेक", "अपीलाट", "निवेदनक", "उजुरवाला", "अपिलबाट", "अपिलाट"]
	KEYWORDS_4 = ["विपक्षी", "प्रतिवादी", "प्रत्यर्थी", "बिपक्षी", "विपक्षी ः", "पिपक्षी", "प्रत्यार्थी", "विपक्ष", "रेस्पोण्डेण्ट", "रेस्पोन्डेन्ट", "प्रत्यथी"]
	KEYWORDS_5 = ["विषय", "मुद्दा", "बिषय", "मूद्दा", "मुद्द", "मद्दा", "विपक्ष", "मुद्धा", "मुद् दा"]
	KEYWORDS_6 = ["अदालत", "इजलास", "इजालास", "इजलाश", "बेञ्च"]
	KEYWORDS_7 = ["आदेश", "फैसला", "फैसलमा", "निर्णय", "फै सला", "मुद्दा"]
	KEYWORDS_8 = ["न्यायाधीश", "माननीय", "न्यायधीश", "न्यायाधीस", "न्ययाधीश", "न्यायाधिश", "न्यायाधी", "न्यानायधीश", "नयायाधीश", "न्यायाधधिश", "नयाधश"]
	KEYWORDS_9 = [ "विरूद्ध", "बिरूद्ध", "विरुद्ध", "बिरुद्ध"]
	KEYWORDS_10 = ["AP", "FN", "RE", "RI", "LE", "RV", "NF", "CI", "CR", "RC", "SA", "MS", "ND", "RB", "CF", "DF", "RF", "WO", "WH", "WS", "WF", "WC", "CC", "EC"]
	# Extract court information
	while ind < n:
	text = tags[ind].get_text(strip=True)
	if text and any(kw in text for kw in KEYWORDS_6):
	details["अदालत / इजलास"] = text
	ind += 1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Extract judges
	judges = []
	while ind < n:
	text = tags[ind].get_text(strip=True)
	if text:
	if any(kw in text for kw in KEYWORDS_8):
	judges.append(text)
	if any(text.startswith(kw) for kw in KEYWORDS_7) and ("मिति" in text or "मिती" in text):
	details["न्यायाधीश"] = judges
	details["आदेश मिति"] = text
	ind += 1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Extract case details
	bisaya_before_kas_no = False
	while ind < n:
	text = tags[ind].get_text(strip=True)
	if text:
	if any(kw in text for kw in KEYWORDS_5):
	bisaya_before_kas_no = True
	details["विषय"] = text
	ind += 1
	break
	details["केस_नम्बर"] = text
	break
	ind += 1

	if ind > n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Handle different case structures
	if bisaya_before_kas_no:
	case_no = []
	appellant = []
	opposition = []
	temp_flag = True

	while temp_flag and ind < n:
	# Extract case number
	while ind < n:
	text = tags[ind].get_text(strip=True)
	if text:
	case_no.append(text)
	ind += 1
	break
	ind += 1

	# Extract appellant
	while ind < n:
	text = tags[ind].get_text(strip=True)
	if any(kw in text for kw in KEYWORDS_3):
	if any(kw2 == text for kw2 in KEYWORDS_3):
	ind+=1
	text = tags[ind].get_text(strip=True)
	appellant.append(text)
	ind += 1
	break
	ind += 1

	# Extract opposition
	while ind < n:
	text = tags[ind].get_text(strip=True)
	if any(kw in text for kw in KEYWORDS_4):
	if any(kw2 == text for kw2 in KEYWORDS_4):
	ind += 1
	text = tags[ind].get_text(strip=True)
	opposition.append(text)
	ind += 1
	break
	ind += 1

	# Check for end condition
	temp_ind = ind
	for tag in tags[temp_ind:]:
	text = tag.get_text(strip=True)
	if any(kw in text for kw in KEYWORDS_2):
	temp_flag = False
	details["केस_नम्बर"] = case_no
	details["निवेदक"] = appellant
	details["विपक्षी"] = opposition
	break
	elif any(kw == text for kw in KEYWORDS_9):
	break

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind
	else:
	# Standard case structure
	while ind < n:
	text = tags[ind].get_text(strip=True)
	if any(kw in text for kw in KEYWORDS_5):
	details["विषय"] = text
	ind += 1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	while ind < n:
	text = tags[ind].get_text(strip=True)
	if any(kw in text for kw in KEYWORDS_3):
	if any(kw2 == text for kw2 in KEYWORDS_3):
	ind+=1
	text = tags[ind].get_text(strip=True)
	details["निवेदक"] = text
	ind += 1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	while ind < n:
	text = tags[ind].get_text(strip=True)
	if any(kw in text for kw in KEYWORDS_4):
	if any(kw2 == text for kw2 in KEYWORDS_4):
	ind+=1
	text = tags[ind].get_text(strip=True)
	details["विपक्षी"] = text
	ind += 1
	break
	ind += 1

	if ind >= n:
	ind = temp_ind_32
	else:
	temp_ind_32 = ind

	# Clean up extracted text
	# self.clean_extracted_details(details, bisaya_before_kas_no)

	# Extract prakarans and tahar
	prakarans = []
	prev = ""
	tahar = []
	temp_flag_tahar = False

	for tag in tags[ind:]:
	text = tag.get_text(separator=' ', strip=True)
	if text:
	#if "§" in text or any(kw in text for kw in KEYWORDS_2):
	#prakarans.append(text)
	if "§" in text or any(text.startswith(kw) for kw in KEYWORDS_2) or "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if any(text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 23: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(text)
	prev = ""
	if "§" in text:
	prakarans.append(text)
	if "फैसला"==text or "आदेश"==text or "फैसलाः"==text:
	if not prakarans:
	prakarans.append(prev)
	else:
	prev = prev + " " + text if prev else text

	if text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(text)

	# Process list items
	next_sib = tag.find_next_sibling()
	while next_sib and next_sib.name in ['ul', 'ol']:
	for li in next_sib.find_all('li'):
	li_text = li.get_text(separator=' ', strip=True)
	if li_text:
	if any(li_text.startswith(kw) for kw in KEYWORDS_2):
	if prev: # IMPROVEMENT 24: Only append if prev has content
	prakarans.append(prev)
	prakarans.append(li_text)
	prev = ""
	else:
	prev = prev + " " + li_text if prev else li_text
	if li_text in ["फैसला", "आदेश", "फैसलाः"] or temp_flag_tahar:
	temp_flag_tahar = True
	tahar.append(li_text)
	next_sib = next_sib.find_next_sibling()

	details["प्रकरण"] = prakarans
	details["ठहर"] = tahar

	# Get HTML file path
	html_file_path = ""
	if mudda_type and sal:
	filename = self.generate_html_filename(url, mudda_type, sal)
	html_file_path = os.path.join(self.html_folder, filename)

	# Combine all data, handling lists and strings appropriately
	data = {
	"लिङ्क": url,
	"निर्णय नं.": decision_title,
	"भाग": bhaag,
	"मुद्दाको किसिम": mudda_type,
	"साल": saal,
	"महिना": mahina,
	"अंक": anka,
	"फैसला मिति": f"'{decision_date}'",
	"अदालत / इजलास": details.get("अदालत / इजलास", "N/A"),
	"न्यायाधीश": json.dumps(details.get("न्यायाधीश", []), ensure_ascii=False),
	"आदेश मिति": details.get("आदेश मिति", "N/A"),
	"केस_नम्बर": json.dumps(details.get("केस_नम्बर", []), ensure_ascii=False) if isinstance(details.get("केस_नम्बर"), list) else details.get("केस_नम्बर", "N/A"),
	"विषय": details.get("विषय", "N/A"),
	"निवेदक": json.dumps(details.get("निवेदक", []), ensure_ascii=False) if isinstance(details.get("निवेदक"), list) else details.get("निवेदक", "N/A"),
	"विपक्षी": json.dumps(details.get("विपक्षी", []), ensure_ascii=False) if isinstance(details.get("विपक्षी"), list) else details.get("विपक्षी", "N/A"),
	"प्रकरण": json.dumps(details.get("प्रकरण", []), ensure_ascii=False),
	"ठहर": json.dumps(details.get("ठहर", []), ensure_ascii=False),
	"html_file_path": html_file_path
	}

	# Save to SQLite
	self.save_to_sqlite(data)
	print(f"{url} - Successfully Scraped and Entered")
	return True

	except Exception as e:
	print(f"Error scraping {url}: {e}")
	return False

	def save_to_sqlite(self, data):
	"""Save data to SQLite database"""
	cursor = self.conn.cursor()

	try:
	cursor.execute('''
	INSERT OR REPLACE INTO cases (
	लिङ्क, निर्णय_नं, भाग, मुद्दाको_किसिम, साल, महिना, अंक, फैसला_मिति,
	अदालत_वा_इजलास, न्यायाधीश, आदेश_मिति, केस_नम्बर, विषय, निवेदक, विपक्षी,
	प्रकरण, ठहर, html_file_path
	) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
	''', (
	data["लिङ्क"], data["निर्णय नं."], data["भाग"], data["मुद्दाको किसिम"],
	data["साल"], data["महिना"], data["अंक"], data["फैसला मिति"],
	data["अदालत / इजलास"], data["न्यायाधीश"], data["आदेश मिति"], data["केस_नम्बर"],
	data["विषय"], data["निवेदक"], data["विपक्षी"], data["प्रकरण"], data["ठहर"],
	data["html_file_path"]
	))
	self.conn.commit()
	except sqlite3.Error as e:
	print(f"Database error: {e}")
	raise

	def save_failed_links(self, failed_links, mudda_type, sal, error_msg="Unknown error"):
	"""Save failed links to SQLite database"""
	if failed_links:
	cursor = self.conn.cursor()
	for link in failed_links:
	try:
	cursor.execute('''
	INSERT INTO failed_links (मुद्दाको_किसिम, साल, लिङ्क, error_message, retry_count)
	VALUES (?, ?, ?, ?, ?)
	''', (mudda_type, sal, link, error_msg, 1))
	except sqlite3.Error as e:
	print(f"Error saving failed link {link}: {e}")
	try:
	self.conn.commit()
	except sqlite3.Error as e:
	print(f"Error committing failed links: {e}")

	def test_single_link(self, url, mudda_type=None, sal=None, use_saved=True):
	"""Test scraping a single link"""
	print(f"Testing single link: {url}")

	# If mudda_type and sal not provided, try to extract from existing data or filename
	if not mudda_type or not sal:
	cursor = self.conn.cursor()
	cursor.execute('SELECT मुद्दाको_किसिम, साल FROM cases WHERE लिङ्क = ?', (url,))
	result = cursor.fetchone()
	if result:
	mudda_type, sal = result
	print(f"Found existing data: mudda_type={mudda_type}, sal={sal}")

	if not mudda_type or not sal:
	print("Warning: mudda_type and sal not provided and couldn't be determined from existing data")
	print("Using generic scraping without HTML file management")

	success = self.scrape_case_details_generic(url, mudda_type, sal, use_saved)
	if success:
	print("✓ Successfully scraped and saved to database")
	else:
	print("✗ Failed to scrape")

	return success

	def test_saved_html_files(self, mudda_type=None, sal=None, limit=None):
	"""Test scraping from saved HTML files"""
	html_files = self.get_saved_html_files_by_criteria(mudda_type, sal)

	if not html_files:
	print("No saved HTML files found matching criteria")
	return

	print(f"Found {len(html_files)} saved HTML files")

	if limit:
	html_files = html_files[:limit]
	print(f"Testing first {limit} files")

	successful_count = 0
	failed_count = 0

	for html_file in html_files:
	file_mudda_type, file_sal, link_number = self.extract_info_from_filename(html_file)

	if not file_mudda_type or not file_sal:
	print(f"Could not extract info from filename: {html_file}")
	failed_count += 1
	continue

	# Reconstruct URL (this is a simplified approach)
	url = f"https://nkp.gov.np/full_detail/{link_number}"

	print(f"Testing {html_file} -> {file_mudda_type}, {file_sal}")

	success = self.scrape_case_details_generic(url, file_mudda_type, file_sal, use_saved=True)

	if success:
	successful_count += 1
	else:
	failed_count += 1

	print(f"\nTest Results:")
	print(f"✓ Successful: {successful_count}")
	print(f"✗ Failed: {failed_count}")
	print(f"Total: {len(html_files)}")

	def run_scraper(self, mudda_type, sal, use_saved=True):
	"""Main method to run the scraper"""
	print(f"Starting scraper for mudda_type: {mudda_type}, sal: {sal}")
	print(f"Using database: {self.output_db}")
	print(f"HTML folder: {self.html_folder}")
	print(f"Use saved HTML files: {use_saved}")

	# Validate inputs
	if mudda_type not in self.mudda_type_arr:
	raise ValueError(f"Invalid mudda_type. Must be one of: {self.mudda_type_arr}")

	# Generate search URL
	try:
	search_url = self.search_url(mudda_type, sal)
	print(f"Search URL: {search_url}")
	except Exception as e:
	print(f"Error generating search URL: {e}")
	return

	# Get all case URLs
	print("Fetching all case URLs...")
	case_urls = self.get_all_pages(search_url, mudda_type, sal, use_saved)

	if not case_urls:
	print("No case URLs found!")
	return

	print(f"Found {len(case_urls)} case URLs to scrape")

	# Scrape each case
	successful_count = 0
	failed_links = []

	for i, url in enumerate(case_urls, 1):
	print(f"Processing {i}/{len(case_urls)}: {url}")

	success = self.scrape_case_details_generic(url, mudda_type, sal, use_saved)
	if success:
	successful_count += 1
	else:
	failed_links.append(url)

	# Add delay between requests only if downloading from web
	if not use_saved:
	time.sleep(2)

	# Retry failed links once
	if failed_links:
	print(f"\nRetrying {len(failed_links)} failed links...")
	still_failed = []

	for i, url in enumerate(failed_links, 1):
	print(f"Retrying {i}/{len(failed_links)}: {url}")

	success = self.scrape_case_details_generic(url, mudda_type, sal, use_saved=False) # Force web download on retry
	if success:
	successful_count += 1
	else:
	still_failed.append(url)

	time.sleep(2)

	# Save permanently failed links
	if still_failed:
	self.save_failed_links(still_failed, mudda_type, sal, "Failed after retry")

	print(f"\nFinal Results:")
	print(f"Total links found: {len(case_urls)}")
	print(f"Successfully scraped: {successful_count}")
	print(f"Failed to scrape: {len(still_failed)}")

	if still_failed:
	print(f"Failed links saved to database: failed_links table")
	else:
	print(f"\nResults:")
	print(f"Total links found: {len(case_urls)}")
	print(f"Successfully scraped: {successful_count}")

	print(f"Scraped data saved to SQLite database: {self.output_db}")

	def close(self):
	"""Explicitly close the database connection"""
	if hasattr(self, 'conn'):
	self.conn.close()

	def __del__(self):
	"""Close SQLite connection when the object is destroyed"""
	self.close()


	def create_parser():
	"""Create command line argument parser"""
	parser = argparse.ArgumentParser(
	description="Legal Case Scraper for Nepal Kanoon Patrika",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	# Scrape specific mudda_type and year
	python app.py --mudda_type "दुनियाबादी देवानी" --nepali_year "२०७३" --database_name "app_test_db.db"

	# Test a specific link
	python app.py --test_link "https://nkp.gov.np/8035" --mudda_type "दुनियाबादी देवानी" --nepali_year "२०७३"

	# Test saved HTML files
	python app.py --test_saved --nepali_year "२०७३" --limit 5

	# Use saved HTML files for scraping (faster)
	python app.py --mudda_type "दुनियाबादी देवानी" --nepali_year "२०७३" --use_saved

	# List available mudda types
	python app.py --list_mudda_types
	"""
	)

	parser.add_argument('--mudda_type', type=str,
	help='Mudda type (e.g., "दुनियाबादी देवानी")')

	parser.add_argument('--nepali_year', type=str,
	help='Nepali year (e.g., "२०७३")')

	parser.add_argument('--database_name', type=str, default='legal_cases_2.db',
	help='SQLite database filename (default: legal_cases_2.db)')

	parser.add_argument('--html_folder', type=str, default='scraped_html',
	help='Folder to store HTML files (default: scraped_html)')

	parser.add_argument('--use_saved', action='store_true',
	help='Use saved HTML files when available (faster)')

	parser.add_argument('--test_link', type=str,
	help='Test scraping a specific link')

	parser.add_argument('--test_saved', action='store_true',
	help='Test scraping from saved HTML files')

	parser.add_argument('--limit', type=int,
	help='Limit number of files to test (use with --test_saved)')

	parser.add_argument('--list_mudda_types', action='store_true',
	help='List all available mudda types')

	return parser


	def main():
	"""Main function to run the application"""
	parser = create_parser()
	args = parser.parse_args()

	# List mudda types if requested
	if args.list_mudda_types:
	temp_scraper = LegalCaseScraper()
	print("Available mudda_type options:")
	for i, option in enumerate(temp_scraper.mudda_type_arr, 1):
	print(f"{i}. {option}")
	temp_scraper.close()
	return

	# Create the scraper
	scraper = LegalCaseScraper(
	output_db=args.database_name,
	html_folder=args.html_folder
	)

	try:
	# Test single link
	if args.test_link:
	success = scraper.test_single_link(
	args.test_link,
	args.mudda_type,
	args.nepali_year,
	use_saved=args.use_saved
	)
	return

	# Test saved HTML files
	if args.test_saved:
	scraper.test_saved_html_files(
	mudda_type=args.mudda_type,
	sal=args.nepali_year,
	limit=args.limit
	)
	return

	# Regular scraping
	if not args.mudda_type or not args.nepali_year:
	print("Error: --mudda_type and --nepali_year are required for scraping")
	print("Use --help for usage examples")
	return

	scraper.run_scraper(
	mudda_type=args.mudda_type,
	sal=args.nepali_year,
	use_saved=args.use_saved
	)

	except Exception as e:
	print(f"Error: {e}")
	finally:
	scraper.close()


	if __name__ == "__main__":
	main()