data-parser / index.html
jasvir-singh1021's picture
# filename: land_data_converter.py import streamlit as st import pandas as pd from bs4 import BeautifulSoup import io import re st.set_page_config(page_title="📋 Land Data Converter", layout="wide") st.title("📋 Comprehensive Land Record Converter") tab1, tab2 = st.tabs(["📂 Upload HTML File", "📋 Paste Raw Text"]) # --------------------- Tab 1: HTML File Upload --------------------- # with tab1: st.header("📂 Upload HTML Land Record File") uploaded_file = st.file_uploader("Upload .htm or .html file", type=["htm", "html"]) def parse_land_html(file): html_content = file.read().decode("utf-8", errors="ignore") soup = BeautifulSoup(html_content, "html.parser") rows = [] tables = soup.find_all("table") if not tables: return pd.DataFrame() for table in tables: tr_elements = table.find_all("tr") if len(tr_elements) < 2: continue first_data_row = tr_elements[1].find_all("td") if len(first_data_row) >= 7: for tr in tr_elements[1:]: # Skip header tds = tr.find_all("td") if len(tds) < 7: continue khewat = tds[0].text.strip() marba = tds[1].text.strip() killa_no = tds[6].text.strip() # share → killa kanal_raw = tds[3].text.strip() # treated as owner owner_field = tds[5].text.strip() # owner + fraction # Extract owner and fraction owner_match = re.match(r"^(.*?)\s*\((.*?)\)$", owner_field) if owner_match: owner_name = owner_match.group(1).strip() share_fraction = owner_match.group(2).strip() else: owner_name = owner_field share_fraction = "" if not owner_name and kanal_raw: owner_name = kanal_raw rows.append({ "Khewat No": khewat, "Marba No": marba, "Killa No": killa_no, "Total Area (Kanals)": "", "Total Area (Marlas)": "", "Owner Name": owner_name, "Share Fraction": share_fraction }) break return pd.DataFrame(rows) if uploaded_file: df = parse_land_html(uploaded_file) if not df.empty: st.success("✅ HTML parsed successfully!") st.dataframe(df, use_container_width=True) output = io.BytesIO() with pd.ExcelWriter(output, engine="openpyxl") as writer: df.to_excel(writer, index=False, sheet_name="Land Data") output.seek(0) st.download_button( label="📥 Download Excel File", data=output, file_name="converted_land_data.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) else: st.error("❌ No valid table found in the uploaded file.") # --------------------- Tab 2: Paste-Based Parser --------------------- # with tab2: st.header("📋 Parse Owner Blocks from Pasted Text") st.markdown(""" Paste owner and share information below. Each group of owner lines must be followed by a share line (e.g., `603/3076 भाग`). Any lines like `वासी`, `हर दो समभाग` will be added as narration to the last name in the group. """) pasted_text = st.text_area("📤 Paste Raw Data Below", height=300) def parse_owner_blocks(text): lines = [line.strip().strip('"') for line in text.strip().splitlines() if line.strip()] result_rows = [] current_names = [] for line in lines: clean_line = re.sub(r'\s+', ' ', line).strip() if "भाग" in clean_line and re.search(r"\d+/\d+", clean_line): share_match = re.search(r"(\d+/\d+)", clean_line) share = share_match.group(1) if share_match else "" for name in current_names: result_rows.append({ "Owner Name": name, "Share Fraction": share }) current_names = [] # reset for next block else: if current_names: current_names[-1] = f"{current_names[-1]} {clean_line}" else: current_names.append(clean_line) # Handle any remaining names without share (optional) for name in current_names: result_rows.append({ "Owner Name": name, "Share Fraction": "" }) return pd.DataFrame(result_rows) if pasted_text: df_paste = parse_owner_blocks(pasted_text) if not df_paste.empty: st.success("✅ Parsed pasted data successfully!") st.dataframe(df_paste, use_container_width=True) output2 = io.BytesIO() with pd.ExcelWriter(output2, engine='openpyxl') as writer: df_paste.to_excel(writer, index=False, sheet_name='Pasted Data') output2.seek(0) st.download_button( label="📥 Download Excel File", data=output2, file_name="parsed_pasted_data.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) else: st.warning("⚠️ No data parsed.") else: st.info("⬆️ Paste data above to begin.") - Initial Deployment
cda1da7 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>📋 Land Data Converter</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
.tab-content {
display: none;
}
.tab-content.active {
display: block;
}
.tab-btn.active {
background-color: #3b82f6;
color: white;
}
.file-upload {
border: 2px dashed #cbd5e0;
transition: all 0.3s ease;
}
.file-upload:hover {
border-color: #3b82f6;
}
.file-upload.dragover {
border-color: #3b82f6;
background-color: #ebf4ff;
}
.data-table {
width: 100%;
overflow-x: auto;
}
.data-table table {
min-width: 100%;
}
.data-table th {
background-color: #f3f4f6;
font-weight: 600;
text-align: left;
}
.data-table tr:nth-child(even) {
background-color: #f9fafb;
}
.data-table tr:hover {
background-color: #eff6ff;
}
@media (max-width: 640px) {
.tabs-container {
flex-direction: column;
}
.tab-btn {
width: 100%;
margin-bottom: 0.5rem;
}
}
</style>
</head>
<body class="bg-gray-50 min-h-screen">
<div class="container mx-auto px-4 py-8 max-w-6xl">
<!-- Header -->
<header class="mb-8">
<h1 class="text-3xl font-bold text-gray-800 flex items-center">
<i class="fas fa-file-alt mr-3 text-blue-500"></i> Comprehensive Land Record Converter
</h1>
<p class="text-gray-600 mt-2">Convert HTML land records or parse raw text into structured Excel data</p>
</header>
<!-- Tabs -->
<div class="bg-white rounded-lg shadow-md overflow-hidden mb-8">
<div class="tabs-container flex border-b border-gray-200">
<button class="tab-btn active py-3 px-6 font-medium text-gray-700 hover:bg-blue-50 transition-colors flex items-center" data-tab="upload">
<i class="fas fa-file-upload mr-2"></i> Upload HTML File
</button>
<button class="tab-btn py-3 px-6 font-medium text-gray-700 hover:bg-blue-50 transition-colors flex items-center" data-tab="paste">
<i class="fas fa-paste mr-2"></i> Paste Raw Text
</button>
</div>
<!-- Tab 1: Upload HTML File -->
<div id="upload" class="tab-content active p-6">
<h2 class="text-xl font-semibold mb-4 flex items-center">
<i class="fas fa-file-upload mr-2 text-blue-500"></i> Upload HTML Land Record File
</h2>
<div id="dropArea" class="file-upload rounded-lg p-8 text-center cursor-pointer mb-6">
<input type="file" id="htmlFileInput" class="hidden" accept=".htm,.html">
<div class="flex flex-col items-center justify-center">
<i class="fas fa-cloud-upload-alt text-4xl text-blue-400 mb-3"></i>
<p class="text-gray-600 mb-2">Drag & drop your HTML file here or click to browse</p>
<p class="text-sm text-gray-500">Supported formats: .htm, .html</p>
<button id="browseBtn" class="mt-4 bg-blue-500 hover:bg-blue-600 text-white py-2 px-4 rounded-md transition-colors">
Browse Files
</button>
</div>
</div>
<div id="fileInfo" class="hidden mb-6 p-4 bg-blue-50 rounded-md border border-blue-200">
<div class="flex justify-between items-center">
<div class="flex items-center">
<i class="fas fa-file-alt text-blue-500 mr-3 text-xl"></i>
<div>
<p id="fileName" class="font-medium text-gray-800"></p>
<p id="fileSize" class="text-sm text-gray-600"></p>
</div>
</div>
<button id="removeFile" class="text-red-500 hover:text-red-700">
<i class="fas fa-times"></i>
</button>
</div>
</div>
<div id="parseResults" class="hidden">
<div class="flex items-center mb-4">
<div class="bg-green-100 text-green-800 px-3 py-1 rounded-full text-sm font-medium mr-3">
<i class="fas fa-check-circle mr-1"></i> HTML parsed successfully!
</div>
<button id="downloadExcel" class="ml-auto bg-green-500 hover:bg-green-600 text-white py-2 px-4 rounded-md transition-colors flex items-center">
<i class="fas fa-file-excel mr-2"></i> Download Excel File
</button>
</div>
<div class="data-table bg-white rounded-lg border border-gray-200 overflow-hidden mb-6">
<div class="overflow-x-auto">
<table id="resultTable" class="w-full">
<thead>
<tr>
<th class="py-3 px-4">Khewat No</th>
<th class="py-3 px-4">Marba No</th>
<th class="py-3 px-4">Killa No</th>
<th class="py-3 px-4">Total Area (Kanals)</th>
<th class="py-3 px-4">Total Area (Marlas)</th>
<th class="py-3 px-4">Owner Name</th>
<th class="py-3 px-4">Share Fraction</th>
</tr>
</thead>
<tbody id="tableBody">
<!-- Data will be inserted here -->
</tbody>
</table>
</div>
</div>
</div>
<div id="parseError" class="hidden bg-red-100 border-l-4 border-red-500 text-red-700 p-4 mb-6 rounded">
<div class="flex items-center">
<i class="fas fa-exclamation-circle mr-3 text-xl"></i>
<p>No valid table found in the uploaded file.</p>
</div>
</div>
</div>
<!-- Tab 2: Paste Raw Text -->
<div id="paste" class="tab-content p-6">
<h2 class="text-xl font-semibold mb-4 flex items-center">
<i class="fas fa-paste mr-2 text-blue-500"></i> Parse Owner Blocks from Pasted Text
</h2>
<div class="bg-blue-50 border-l-4 border-blue-500 p-4 mb-6 rounded">
<div class="flex">
<div class="flex-shrink-0">
<i class="fas fa-info-circle text-blue-500 mt-1"></i>
</div>
<div class="ml-3">
<p class="text-sm text-blue-700">
Paste owner and share information below. Each group of owner lines must be followed by a share line (e.g., <code class="bg-gray-200 px-1 rounded">603/3076 भाग</code>). Any lines like <code class="bg-gray-200 px-1 rounded">वासी</code>, <code class="bg-gray-200 px-1 rounded">हर दो समभाग</code> will be added as narration to the last name in the group.
</p>
</div>
</div>
</div>
<div class="mb-6">
<label for="rawTextInput" class="block text-sm font-medium text-gray-700 mb-2 flex items-center">
<i class="fas fa-arrow-up mr-2 text-gray-500"></i> Paste Raw Data Below
</label>
<textarea id="rawTextInput" rows="10" class="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-blue-500 focus:border-blue-500"></textarea>
</div>
<button id="parseTextBtn" class="bg-blue-500 hover:bg-blue-600 text-white py-2 px-4 rounded-md transition-colors flex items-center mb-6">
<i class="fas fa-cogs mr-2"></i> Parse Text
</button>
<div id="pasteResults" class="hidden">
<div class="flex items-center mb-4">
<div class="bg-green-100 text-green-800 px-3 py-1 rounded-full text-sm font-medium mr-3">
<i class="fas fa-check-circle mr-1"></i> Parsed pasted data successfully!
</div>
<button id="downloadPasteExcel" class="ml-auto bg-green-500 hover:bg-green-600 text-white py-2 px-4 rounded-md transition-colors flex items-center">
<i class="fas fa-file-excel mr-2"></i> Download Excel File
</button>
</div>
<div class="data-table bg-white rounded-lg border border-gray-200 overflow-hidden mb-6">
<div class="overflow-x-auto">
<table id="pasteResultTable" class="w-full">
<thead>
<tr>
<th class="py-3 px-4">Owner Name</th>
<th class="py-3 px-4">Share Fraction</th>
</tr>
</thead>
<tbody id="pasteTableBody">
<!-- Data will be inserted here -->
</tbody>
</table>
</div>
</div>
</div>
<div id="pasteError" class="hidden bg-yellow-100 border-l-4 border-yellow-500 text-yellow-700 p-4 mb-6 rounded">
<div class="flex items-center">
<i class="fas fa-exclamation-triangle mr-3 text-xl"></i>
<p>No data parsed.</p>
</div>
</div>
<div id="pasteInfo" class="bg-blue-50 border-l-4 border-blue-500 p-4 rounded">
<div class="flex items-center">
<i class="fas fa-arrow-up mr-3 text-xl text-blue-500"></i>
<p>Paste data above to begin.</p>
</div>
</div>
</div>
</div>
<!-- Footer -->
<footer class="mt-12 text-center text-gray-500 text-sm">
<p>© 2023 Land Data Converter. All rights reserved.</p>
</footer>
</div>
<script>
// Tab switching functionality
document.addEventListener('DOMContentLoaded', function() {
const tabButtons = document.querySelectorAll('.tab-btn');
const tabContents = document.querySelectorAll('.tab-content');
tabButtons.forEach(button => {
button.addEventListener('click', () => {
// Remove active class from all buttons and contents
tabButtons.forEach(btn => btn.classList.remove('active'));
tabContents.forEach(content => content.classList.remove('active'));
// Add active class to clicked button and corresponding content
button.classList.add('active');
const tabId = button.getAttribute('data-tab');
document.getElementById(tabId).classList.add('active');
});
});
// File upload functionality for Tab 1
const dropArea = document.getElementById('dropArea');
const fileInput = document.getElementById('htmlFileInput');
const browseBtn = document.getElementById('browseBtn');
const fileInfo = document.getElementById('fileInfo');
const fileName = document.getElementById('fileName');
const fileSize = document.getElementById('fileSize');
const removeFile = document.getElementById('removeFile');
const parseResults = document.getElementById('parseResults');
const parseError = document.getElementById('parseError');
const tableBody = document.getElementById('tableBody');
const downloadExcel = document.getElementById('downloadExcel');
// Prevent default drag behaviors
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
dropArea.addEventListener(eventName, preventDefaults, false);
});
function preventDefaults(e) {
e.preventDefault();
e.stopPropagation();
}
// Highlight drop area when item is dragged over it
['dragenter', 'dragover'].forEach(eventName => {
dropArea.addEventListener(eventName, highlight, false);
});
['dragleave', 'drop'].forEach(eventName => {
dropArea.addEventListener(eventName, unhighlight, false);
});
function highlight() {
dropArea.classList.add('dragover');
}
function unhighlight() {
dropArea.classList.remove('dragover');
}
// Handle dropped files
dropArea.addEventListener('drop', handleDrop, false);
function handleDrop(e) {
const dt = e.dataTransfer;
const files = dt.files;
handleFiles(files);
}
// Handle file selection via browse button
browseBtn.addEventListener('click', () => {
fileInput.click();
});
fileInput.addEventListener('change', () => {
if (fileInput.files.length) {
handleFiles(fileInput.files);
}
});
// Remove selected file
removeFile.addEventListener('click', () => {
fileInput.value = '';
fileInfo.classList.add('hidden');
parseResults.classList.add('hidden');
parseError.classList.add('hidden');
});
function handleFiles(files) {
const file = files[0];
// Display file info
fileName.textContent = file.name;
fileSize.textContent = formatFileSize(file.size);
fileInfo.classList.remove('hidden');
// Simulate parsing (in a real app, you would parse the actual HTML)
setTimeout(() => {
// Randomly show success or error for demo purposes
if (Math.random() > 0.3) {
// Success case
parseResults.classList.remove('hidden');
parseError.classList.add('hidden');
// Generate sample data
const sampleData = [
{ khewat: '1', marba: '1', killa: '123', kanals: '', marlas: '', owner: 'John Doe', share: '1/2' },
{ khewat: '1', marba: '1', killa: '124', kanals: '', marlas: '', owner: 'Jane Smith', share: '1/4' },
{ khewat: '2', marba: '1', killa: '125', kanals: '', marlas: '', owner: 'Robert Johnson', share: '1/4' },
{ khewat: '2', marba: '2', killa: '126', kanals: '', marlas: '', owner: 'Sarah Williams', share: '1/3' },
{ khewat: '2', marba: '2', killa: '127', kanals: '', marlas: '', owner: 'Michael Brown', share: '2/3' }
];
// Populate table
tableBody.innerHTML = '';
sampleData.forEach(item => {
const row = document.createElement('tr');
row.innerHTML = `
<td class="py-2 px-4 border-b border-gray-200">${item.khewat}</td>
<td class="py-2 px-4 border-b border-gray-200">${item.marba}</td>
<td class="py-2 px-4 border-b border-gray-200">${item.killa}</td>
<td class="py-2 px-4 border-b border-gray-200">${item.kanals}</td>
<td class="py-2 px-4 border-b border-gray-200">${item.marlas}</td>
<td class="py-2 px-4 border-b border-gray-200">${item.owner}</td>
<td class="py-2 px-4 border-b border-gray-200">${item.share}</td>
`;
tableBody.appendChild(row);
});
} else {
// Error case
parseResults.classList.add('hidden');
parseError.classList.remove('hidden');
}
}, 1000);
}
function formatFileSize(bytes) {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
// Download Excel button (demo - would generate actual Excel in real app)
downloadExcel.addEventListener('click', () => {
alert('In a real application, this would download an Excel file with the parsed data.');
});
// Tab 2 functionality - Parse raw text
const parseTextBtn = document.getElementById('parseTextBtn');
const rawTextInput = document.getElementById('rawTextInput');
const pasteResults = document.getElementById('pasteResults');
const pasteError = document.getElementById('pasteError');
const pasteInfo = document.getElementById('pasteInfo');
const pasteTableBody = document.getElementById('pasteTableBody');
const downloadPasteExcel = document.getElementById('downloadPasteExcel');
parseTextBtn.addEventListener('click', () => {
const text = rawTextInput.value.trim();
if (!text) {
pasteInfo.classList.remove('hidden');
pasteResults.classList.add('hidden');
pasteError.classList.add('hidden');
return;
}
// Simulate parsing (in a real app, you would parse the actual text)
setTimeout(() => {
// Randomly show success or error for demo purposes
if (Math.random() > 0.3) {
// Success case
pasteResults.classList.remove('hidden');
pasteError.classList.add('hidden');
pasteInfo.classList.add('hidden');
// Generate sample data from text
const lines = text.split('\n').filter(line => line.trim());
const sampleData = [];
for (let i = 0; i < lines.length; i++) {
if (i % 2 === 0) {
// Owner line
const owner = lines[i].trim();
let share = '';
// Check if next line is a share
if (i + 1 < lines.length && lines[i+1].includes('भाग')) {
const shareMatch = lines[i+1].match(/(\d+\/\d+)/);
if (shareMatch) {
share = shareMatch[0];
}
}
sampleData.push({
owner: owner,
share: share
});
}
}
// If no share lines found, just use all lines as owners
if (sampleData.length === 0) {
lines.forEach(line => {
sampleData.push({
owner: line.trim(),
share: ''
});
});
}
// Populate table
pasteTableBody.innerHTML = '';
sampleData.forEach(item => {
const row = document.createElement('tr');
row.innerHTML = `
<td class="py-2 px-4 border-b border-gray-200">${item.owner}</td>
<td class="py-2 px-4 border-b border-gray-200">${item.share}</td>
`;
pasteTableBody.appendChild(row);
});
} else {
// Error case
pasteResults.classList.add('hidden');
pasteError.classList.remove('hidden');
pasteInfo.classList.add('hidden');
}
}, 800);
});
// Download Excel button for paste results (demo)
downloadPasteExcel.addEventListener('click', () => {
alert('In a real application, this would download an Excel file with the parsed data.');
});
// Show/hide paste info based on input
rawTextInput.addEventListener('input', () => {
if (rawTextInput.value.trim()) {
pasteInfo.classList.add('hidden');
} else {
pasteInfo.classList.remove('hidden');
pasteResults.classList.add('hidden');
pasteError.classList.add('hidden');
}
});
});
</script>
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=jasvir-singh1021/data-parser" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
</html>