Spaces:
Running
Running
| """ | |
| Bharat Tech Atlas โ Database layer using SQLite with R-Tree spatial indexing. | |
| """ | |
| import sqlite3 | |
| import os | |
| import json | |
| import math | |
| DB_PATH = os.path.join(os.path.dirname(__file__), "..", "data", "bharattechatlas.db") | |
| def get_db(): | |
| conn = sqlite3.connect(DB_PATH) | |
| conn.row_factory = sqlite3.Row | |
| conn.execute("PRAGMA journal_mode=WAL") | |
| conn.execute("PRAGMA foreign_keys=ON") | |
| conn.execute("PRAGMA cache_size=-8000") | |
| conn.execute("PRAGMA mmap_size=67108864") | |
| conn.execute("PRAGMA synchronous=NORMAL") | |
| conn.execute("PRAGMA temp_store=MEMORY") | |
| return conn | |
| def init_db(): | |
| conn = get_db() | |
| conn.executescript(""" | |
| CREATE TABLE IF NOT EXISTS entities ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| name TEXT NOT NULL, | |
| slug TEXT UNIQUE NOT NULL, | |
| entity_type TEXT NOT NULL CHECK (entity_type IN ( | |
| 'startup', 'sme', 'college_ecell', 'incubator', | |
| 'accelerator', 'coworking', 'investor' | |
| )), | |
| sectors TEXT NOT NULL DEFAULT '[]', | |
| dpiit_category TEXT, | |
| business_model TEXT CHECK (business_model IN ( | |
| 'lifestyle', 'scalable', 'social', 'large_company', NULL | |
| )), | |
| stage TEXT CHECK (stage IN ( | |
| 'ideation', 'validation', 'early_traction', | |
| 'scaling', 'mature', NULL | |
| )), | |
| dpiit_recognized INTEGER DEFAULT 0, | |
| nsa_winner INTEGER DEFAULT 0, | |
| nsa_category TEXT, | |
| is_women_led INTEGER DEFAULT 0, | |
| is_rural_impact INTEGER DEFAULT 0, | |
| is_campus_startup INTEGER DEFAULT 0, | |
| unicorn_status TEXT CHECK (unicorn_status IN ( | |
| 'unicorn', 'soonicorn', NULL | |
| )), | |
| funding_inr REAL DEFAULT 0, | |
| funding_stage TEXT, | |
| last_funding_date TEXT, | |
| funding_rounds TEXT DEFAULT '[]', | |
| valuation_usd REAL, | |
| description TEXT, | |
| website TEXT, | |
| logo_url TEXT, | |
| linkedin_url TEXT, | |
| instagram_url TEXT, | |
| facebook_url TEXT, | |
| twitter_url TEXT, | |
| linkedin_team_size INTEGER, | |
| linkedin_industry TEXT, | |
| linkedin_specialties TEXT, | |
| investors TEXT DEFAULT '[]', | |
| ynos_profile_url TEXT, | |
| address TEXT, | |
| city TEXT NOT NULL, | |
| district TEXT, | |
| state TEXT NOT NULL, | |
| pin_code TEXT, | |
| latitude REAL NOT NULL, | |
| longitude REAL NOT NULL, | |
| founded_year INTEGER, | |
| employee_count INTEGER, | |
| college_name TEXT, | |
| data_sources TEXT DEFAULT '[]', | |
| created_at TEXT DEFAULT (datetime('now')), | |
| updated_at TEXT DEFAULT (datetime('now')), | |
| is_active INTEGER DEFAULT 1 | |
| ); | |
| CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(entity_type) WHERE is_active = 1; | |
| CREATE INDEX IF NOT EXISTS idx_entities_city ON entities(city); | |
| CREATE INDEX IF NOT EXISTS idx_entities_state ON entities(state); | |
| CREATE INDEX IF NOT EXISTS idx_entities_slug ON entities(slug); | |
| CREATE INDEX IF NOT EXISTS idx_entities_founded ON entities(founded_year); | |
| CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name COLLATE NOCASE); | |
| CREATE INDEX IF NOT EXISTS idx_entities_dpiit_cat ON entities(dpiit_category); | |
| CREATE INDEX IF NOT EXISTS idx_entities_biz_model ON entities(business_model); | |
| CREATE INDEX IF NOT EXISTS idx_entities_unicorn ON entities(unicorn_status); | |
| CREATE INDEX IF NOT EXISTS idx_entities_funding ON entities(funding_inr DESC) WHERE is_active = 1; | |
| CREATE INDEX IF NOT EXISTS idx_entities_type_state ON entities(entity_type, state) WHERE is_active = 1; | |
| CREATE INDEX IF NOT EXISTS idx_entities_type_funding ON entities(entity_type, funding_inr DESC) WHERE is_active = 1; | |
| CREATE INDEX IF NOT EXISTS idx_entities_active_type ON entities(is_active, entity_type); | |
| CREATE INDEX IF NOT EXISTS idx_entities_active_latlon ON entities(is_active, latitude, longitude); | |
| """) | |
| conn.execute(""" | |
| CREATE VIRTUAL TABLE IF NOT EXISTS entities_rtree | |
| USING rtree(id, min_lng, max_lng, min_lat, max_lat) | |
| """) | |
| conn.executescript(""" | |
| CREATE TABLE IF NOT EXISTS sectors ( | |
| slug TEXT PRIMARY KEY, | |
| label TEXT NOT NULL, | |
| parent_slug TEXT, | |
| icon TEXT, | |
| color TEXT, | |
| category TEXT DEFAULT 'top_sector', | |
| FOREIGN KEY (parent_slug) REFERENCES sectors(slug) | |
| ); | |
| INSERT OR IGNORE INTO sectors (slug, label, icon, color, category) VALUES | |
| ('fintech', 'FinTech', '๐ณ', '#3B82F6', 'top_sector'), | |
| ('saas_ai', 'SaaS / AI', 'โ๏ธ', '#6366F1', 'top_sector'), | |
| ('ecommerce', 'E-Commerce', '๐', '#F59E0B', 'top_sector'), | |
| ('healthcare', 'Healthcare', '๐ฅ', '#10B981', 'top_sector'), | |
| ('manufacturing', 'Manufacturing', '๐ญ', '#78716C', 'top_sector'), | |
| ('edtech', 'EdTech', '๐', '#8B5CF6', 'dpiit_category'), | |
| ('agritech', 'AgriTech', '๐พ', '#84CC16', 'dpiit_category'), | |
| ('cleantech', 'CleanTech', '๐ฟ', '#22C55E', 'dpiit_category'), | |
| ('deeptech', 'DeepTech', '๐ฌ', '#EC4899', 'dpiit_category'), | |
| ('logistics', 'Logistics', '๐', '#F97316', 'dpiit_category'), | |
| ('gaming', 'Gaming', '๐ฎ', '#EF4444', 'dpiit_category'), | |
| ('ai_ml', 'AI / ML', '๐ค', '#7C3AED', 'dpiit_category'), | |
| ('cybersecurity', 'Cybersecurity', '๐', '#0EA5E9', 'dpiit_category'), | |
| ('foodtech', 'FoodTech', '๐', '#D97706', 'dpiit_category'), | |
| ('proptech', 'PropTech', '๐ ', '#14B8A6', 'dpiit_category'), | |
| ('legaltech', 'LegalTech', 'โ๏ธ', '#64748B', 'dpiit_category'), | |
| ('mediatech', 'MediaTech', '๐บ', '#E11D48', 'dpiit_category'), | |
| ('mobility', 'Mobility', '๐', '#0891B2', 'dpiit_category'), | |
| ('social_impact', 'Social Impact', '๐', '#059669', 'dpiit_category'), | |
| ('biotech', 'BioTech', '๐งฌ', '#A855F7', 'dpiit_category'), | |
| ('spacetech', 'SpaceTech', '๐', '#1D4ED8', 'dpiit_category'), | |
| ('d2c', 'D2C / E-Commerce', '๐๏ธ', '#F59E0B', 'dpiit_category'), | |
| ('saas', 'SaaS', '๐ป', '#6366F1', 'dpiit_category'), | |
| ('healthtech', 'HealthTech', '๐', '#10B981', 'dpiit_category'), | |
| ('iot', 'IoT', '๐ก', '#0D9488', 'dpiit_category'), | |
| ('drone_tech', 'Drone Tech', '๐ธ', '#4F46E5', 'dpiit_category'), | |
| ('ev', 'EV / E-Mobility', '๐', '#16A34A', 'dpiit_category'), | |
| ('insurtech', 'InsurTech', '๐ก๏ธ', '#2563EB', 'dpiit_category'), | |
| ('wealthtech', 'WealthTech', '๐', '#7C3AED', 'dpiit_category'); | |
| """) | |
| conn.commit() | |
| conn.close() | |
| def haversine_distance(lat1, lon1, lat2, lon2): | |
| R = 6371 | |
| dlat = math.radians(lat2 - lat1) | |
| dlon = math.radians(lon2 - lon1) | |
| a = (math.sin(dlat / 2) ** 2 + | |
| math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * | |
| math.sin(dlon / 2) ** 2) | |
| c = 2 * math.asin(math.sqrt(a)) | |
| return R * c | |