Spaces:
Running
Running
File size: 5,536 Bytes
a25ac93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
#!/usr/bin/env python3
"""
CSV to JSON Converter
Converts dialect CSV files from sheets_output/ to JSON format for index.html
"""
import csv
import json
import os
from pathlib import Path
from collections import defaultdict
# Paths
BASE_DIR = Path(__file__).parent.parent
SHEETS_OUTPUT_DIR = BASE_DIR / "sheets_output"
JSON_OUTPUT_DIR = BASE_DIR / "data" / "processed"
def convert_processed_dialects():
"""Convert processed_dialects.csv to JSON format"""
csv_file = SHEETS_OUTPUT_DIR / "processed_dialects.csv"
json_file = JSON_OUTPUT_DIR / "processed_dialects.json"
if not csv_file.exists():
print(f"โ ๏ธ CSV file not found: {csv_file}")
return False
try:
districts = []
with open(csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
# Group words by district
district_words = defaultdict(lambda: {
'name': '',
'lat': 0,
'lng': 0,
'region': '',
'history': '',
'words': []
})
for row in reader:
district_name = row.get('District', '').strip()
if not district_name:
continue
# Set district metadata (from first occurrence)
if not district_words[district_name]['name']:
district_words[district_name]['name'] = district_name
district_words[district_name]['lat'] = float(row.get('Latitude', 0))
district_words[district_name]['lng'] = float(row.get('Longitude', 0))
district_words[district_name]['region'] = row.get('Region', '')
district_words[district_name]['history'] = row.get('History', '')
# Add word entry
word_entry = {
't': row.get('Telugu_Word', ''),
'm': row.get('Meaning', ''),
's': row.get('Source', '')
}
if word_entry['t']: # Only add if Telugu word exists
district_words[district_name]['words'].append(word_entry)
# Convert to list
districts = list(district_words.values())
# Write JSON
JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
with open(json_file, 'w', encoding='utf-8') as f:
json.dump(districts, f, ensure_ascii=False, indent=2)
print(f"โ
Converted {csv_file.name} โ {json_file.name}")
print(f" {len(districts)} districts, {sum(len(d['words']) for d in districts)} words")
return True
except Exception as e:
print(f"โ Error converting processed_dialects.csv: {e}")
return False
def convert_digiwords_grouped():
"""Convert digiwords_grouped.csv to JSON format"""
csv_file = SHEETS_OUTPUT_DIR / "digiwords_grouped.csv"
json_file = JSON_OUTPUT_DIR / "digiwords_grouped.json"
if not csv_file.exists():
print(f"โ ๏ธ CSV file not found: {csv_file}")
return False
try:
data = {
"Telangana": defaultdict(list),
"Andhra Pradesh": defaultdict(list)
}
with open(csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
state = row.get('State', '').strip()
district = row.get('District', '').strip()
if not state or not district:
continue
word_entry = {
't': row.get('Telugu_Word', ''),
'm': row.get('Meaning', ''),
's': row.get('Source', 'Crowd')
}
if word_entry['t'] and state in data:
data[state][district].append(word_entry)
# Convert defaultdict to regular dict
output = {
state: dict(districts)
for state, districts in data.items()
}
# Write JSON
JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
with open(json_file, 'w', encoding='utf-8') as f:
json.dump(output, f, ensure_ascii=False, indent=2)
tg_count = sum(len(words) for words in output.get("Telangana", {}).values())
ap_count = sum(len(words) for words in output.get("Andhra Pradesh", {}).values())
print(f"โ
Converted {csv_file.name} โ {json_file.name}")
print(f" Telangana: {len(output.get('Telangana', {}))} districts, {tg_count} words")
print(f" Andhra Pradesh: {len(output.get('Andhra Pradesh', {}))} districts, {ap_count} words")
return True
except Exception as e:
print(f"โ Error converting digiwords_grouped.csv: {e}")
return False
def main():
"""Convert all CSV files to JSON"""
print("๐ Starting CSV to JSON conversion...")
print(f"๐ Input: {SHEETS_OUTPUT_DIR}")
print(f"๐ Output: {JSON_OUTPUT_DIR}\n")
success_count = 0
if convert_processed_dialects():
success_count += 1
if convert_digiwords_grouped():
success_count += 1
print(f"\nโจ Conversion complete: {success_count}/2 files successfully converted")
if __name__ == "__main__":
main()
|