indic-faker / app.py
adwaith06's picture
Upload folder using huggingface_hub
0c82f0e verified
import gradio as gr
import pandas as pd
import sys
sys.path.insert(0, "src")
from indic_faker import IndicFaker
LANGUAGES = {
"🇮🇳 Hindi (हिन्दी)": "hi",
"🇮🇳 Malayalam (മലയാളം)": "ml",
"🇮🇳 Tamil (தமிழ்)": "ta",
"🇮🇳 Telugu (తెలుగు)": "te",
"🇮🇳 Bengali (বাংলা)": "bn",
"🇮🇳 Kannada (ಕನ್ನಡ)": "kn",
"🇮🇳 Gujarati (ગુજરાતી)": "gu",
"🇮🇳 Marathi (मराठी)": "mr",
}
FIELD_OPTIONS = [
"name", "name_native", "gender", "dob", "age", "language",
"aadhaar", "pan", "phone", "email", "address", "city",
"state", "pincode", "bank_account", "upi_id",
"employer", "job_title", "salary", "college", "degree",
]
DEFAULT_FIELDS = [
"name", "name_native", "gender", "age",
"phone", "email", "city", "degree", "job_title", "salary",
]
def generate_profiles(language, num_rows, selected_fields):
lang_code = LANGUAGES.get(language, "hi")
fields = selected_fields if selected_fields else DEFAULT_FIELDS
num = max(1, min(int(num_rows), 100))
fake = IndicFaker(language=lang_code)
df = fake.to_dataframe(num, fields=fields)
return df
def generate_single(language):
lang_code = LANGUAGES.get(language, "hi")
fake = IndicFaker(language=lang_code)
p = fake.profile()
# Build a styled card
card = f"""
<div class="profile-card">
<div class="profile-header">
<div class="avatar">{'👨' if p['gender'] == 'male' else '👩'}</div>
<div class="header-info">
<h2>{p['name']}</h2>
<h3>{p['name_native']}</h3>
<span class="badge badge-lang">{p['language'].upper()}</span>
<span class="badge badge-gender">{p['gender'].title()}</span>
<span class="badge badge-age">{p['age']} years</span>
</div>
</div>
<div class="profile-grid">
<div class="info-section">
<h4>🆔 Identity</h4>
<div class="info-row"><span class="label">Aadhaar</span><span class="value mono">{p['aadhaar']}</span></div>
<div class="info-row"><span class="label">PAN</span><span class="value mono">{p['pan']}</span></div>
<div class="info-row"><span class="label">DOB</span><span class="value">{p['dob']}</span></div>
</div>
<div class="info-section">
<h4>📞 Contact</h4>
<div class="info-row"><span class="label">Phone</span><span class="value mono">{p['phone']}</span></div>
<div class="info-row"><span class="label">Email</span><span class="value">{p['email']}</span></div>
<div class="info-row"><span class="label">UPI</span><span class="value mono">{p['upi_id']}</span></div>
</div>
<div class="info-section">
<h4>💼 Career</h4>
<div class="info-row"><span class="label">Title</span><span class="value">{p['job_title']}</span></div>
<div class="info-row"><span class="label">Employer</span><span class="value">{p['employer']}</span></div>
<div class="info-row"><span class="label">Salary</span><span class="value salary">{p['salary']}</span></div>
</div>
<div class="info-section">
<h4>🎓 Education</h4>
<div class="info-row"><span class="label">Degree</span><span class="value">{p['degree']}</span></div>
<div class="info-row"><span class="label">College</span><span class="value">{p['college']}</span></div>
</div>
<div class="info-section full-width">
<h4>📍 Address</h4>
<div class="info-row"><span class="label">Address</span><span class="value">{p['address']}</span></div>
<div class="info-row"><span class="label">City</span><span class="value">{p['city']}</span></div>
<div class="info-row"><span class="label">State</span><span class="value">{p['state']}</span></div>
<div class="info-row"><span class="label">Pincode</span><span class="value mono">{p['pincode']}</span></div>
</div>
<div class="info-section full-width">
<h4>🏦 Banking</h4>
<div class="info-row"><span class="label">Account</span><span class="value mono">{p['bank_account']}</span></div>
<div class="info-row"><span class="label">UPI ID</span><span class="value mono">{p['upi_id']}</span></div>
</div>
</div>
</div>
"""
return card
css = """
/* === Global === */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap');
.gradio-container {
max-width: 1200px !important;
margin: 0 auto !important;
font-family: 'Inter', -apple-system, sans-serif !important;
background: linear-gradient(160deg, #0a0a1a 0%, #1a1a2e 40%, #16213e 100%) !important;
min-height: 100vh;
}
/* === Hero Header === */
.hero-section {
text-align: center;
padding: 40px 20px 30px;
background: linear-gradient(135deg, rgba(255,107,53,0.1) 0%, rgba(99,102,241,0.1) 100%);
border-radius: 20px;
border: 1px solid rgba(255,255,255,0.06);
margin-bottom: 24px;
position: relative;
overflow: hidden;
}
.hero-section::before {
content: '';
position: absolute;
top: -50%;
left: -50%;
width: 200%;
height: 200%;
background: radial-gradient(circle at 30% 50%, rgba(255,107,53,0.06) 0%, transparent 50%),
radial-gradient(circle at 70% 50%, rgba(99,102,241,0.06) 0%, transparent 50%);
animation: pulse 8s ease-in-out infinite alternate;
}
@keyframes pulse {
0% { transform: translate(0, 0); }
100% { transform: translate(-5%, 5%); }
}
.hero-title {
font-size: 2.4em;
font-weight: 800;
letter-spacing: -0.02em;
margin: 0 0 8px;
position: relative;
background: linear-gradient(135deg, #FF6B35 0%, #F7931E 50%, #6366f1 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.hero-subtitle {
font-size: 1.05em;
color: rgba(255,255,255,0.65);
font-weight: 400;
margin: 0 0 16px;
position: relative;
}
.hero-badges {
display: flex;
gap: 8px;
justify-content: center;
position: relative;
flex-wrap: wrap;
}
.hero-badges a img {
transition: transform 0.2s;
}
.hero-badges a img:hover {
transform: translateY(-2px);
}
/* === Stats Bar === */
.stats-bar {
display: flex;
justify-content: center;
gap: 40px;
padding: 16px 0;
margin-bottom: 20px;
}
.stat-item {
text-align: center;
}
.stat-number {
font-size: 1.8em;
font-weight: 800;
background: linear-gradient(135deg, #FF6B35, #F7931E);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.stat-label {
font-size: 0.8em;
color: rgba(255,255,255,0.45);
text-transform: uppercase;
letter-spacing: 0.1em;
font-weight: 500;
}
/* === Tabs === */
.tab-nav button {
font-family: 'Inter', sans-serif !important;
font-weight: 600 !important;
font-size: 0.95em !important;
padding: 12px 24px !important;
border-radius: 12px 12px 0 0 !important;
transition: all 0.3s !important;
}
.tab-nav button.selected {
background: linear-gradient(135deg, #FF6B35, #F7931E) !important;
color: white !important;
border: none !important;
}
/* === Form Controls === */
.gr-input, .gr-dropdown, .gr-slider {
border-radius: 12px !important;
}
/* === Generate Button === */
.generate-btn {
background: linear-gradient(135deg, #FF6B35 0%, #F7931E 100%) !important;
border: none !important;
border-radius: 14px !important;
padding: 14px 32px !important;
font-size: 1.05em !important;
font-weight: 700 !important;
letter-spacing: 0.02em !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
box-shadow: 0 4px 20px rgba(255,107,53,0.3) !important;
text-transform: none !important;
}
.generate-btn:hover {
transform: translateY(-2px) !important;
box-shadow: 0 8px 30px rgba(255,107,53,0.45) !important;
}
.generate-btn:active {
transform: translateY(0) !important;
}
/* === Profile Card === */
.profile-card {
background: linear-gradient(145deg, rgba(30,30,50,0.9) 0%, rgba(20,20,40,0.95) 100%);
border: 1px solid rgba(255,255,255,0.08);
border-radius: 20px;
padding: 0;
overflow: hidden;
backdrop-filter: blur(20px);
}
.profile-header {
display: flex;
align-items: center;
gap: 20px;
padding: 28px 32px;
background: linear-gradient(135deg, rgba(255,107,53,0.15) 0%, rgba(99,102,241,0.15) 100%);
border-bottom: 1px solid rgba(255,255,255,0.06);
}
.avatar {
font-size: 3em;
width: 72px;
height: 72px;
display: flex;
align-items: center;
justify-content: center;
background: rgba(255,255,255,0.05);
border-radius: 18px;
border: 2px solid rgba(255,255,255,0.1);
}
.header-info h2 {
margin: 0;
font-size: 1.5em;
font-weight: 700;
color: #fff;
letter-spacing: -0.01em;
}
.header-info h3 {
margin: 4px 0 10px;
font-size: 1.1em;
font-weight: 400;
color: rgba(255,255,255,0.55);
}
.badge {
display: inline-block;
padding: 3px 10px;
border-radius: 20px;
font-size: 0.75em;
font-weight: 600;
letter-spacing: 0.03em;
margin-right: 6px;
}
.badge-lang {
background: rgba(99,102,241,0.2);
color: #a5b4fc;
border: 1px solid rgba(99,102,241,0.3);
}
.badge-gender {
background: rgba(255,107,53,0.15);
color: #ffb088;
border: 1px solid rgba(255,107,53,0.3);
}
.badge-age {
background: rgba(34,197,94,0.15);
color: #86efac;
border: 1px solid rgba(34,197,94,0.3);
}
.profile-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 0;
padding: 0;
}
.info-section {
padding: 20px 28px;
border-bottom: 1px solid rgba(255,255,255,0.04);
border-right: 1px solid rgba(255,255,255,0.04);
}
.info-section:nth-child(even) {
border-right: none;
}
.info-section.full-width {
grid-column: 1 / -1;
border-right: none;
}
.info-section h4 {
margin: 0 0 12px;
font-size: 0.85em;
font-weight: 600;
color: rgba(255,255,255,0.4);
text-transform: uppercase;
letter-spacing: 0.08em;
}
.info-row {
display: flex;
justify-content: space-between;
align-items: center;
padding: 6px 0;
}
.info-row .label {
font-size: 0.85em;
color: rgba(255,255,255,0.45);
font-weight: 500;
}
.info-row .value {
font-size: 0.9em;
color: rgba(255,255,255,0.9);
font-weight: 500;
text-align: right;
max-width: 65%;
}
.info-row .value.mono {
font-family: 'JetBrains Mono', monospace;
font-size: 0.85em;
color: #a5b4fc;
}
.info-row .value.salary {
color: #4ade80;
font-weight: 700;
}
/* === Table === */
.gr-dataframe {
border-radius: 16px !important;
overflow: hidden !important;
}
/* === Code Block === */
.code-section {
background: rgba(15,15,30,0.8);
border: 1px solid rgba(255,255,255,0.06);
border-radius: 16px;
padding: 28px;
}
.code-section pre {
background: rgba(0,0,0,0.3) !important;
border-radius: 12px !important;
border: 1px solid rgba(255,255,255,0.06) !important;
}
.code-section code {
font-family: 'JetBrains Mono', monospace !important;
font-size: 0.88em !important;
}
/* === Use Cases Grid === */
.use-cases-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: 16px;
margin-top: 20px;
}
.use-case-card {
background: linear-gradient(145deg, rgba(30,30,50,0.6) 0%, rgba(20,20,40,0.8) 100%);
border: 1px solid rgba(255,255,255,0.06);
border-radius: 14px;
padding: 20px;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
}
.use-case-card:hover {
border-color: rgba(255,107,53,0.3);
transform: translateY(-2px);
box-shadow: 0 8px 24px rgba(0,0,0,0.3);
}
.use-case-card .icon {
font-size: 1.8em;
margin-bottom: 10px;
}
.use-case-card h5 {
margin: 0 0 6px;
font-size: 1em;
font-weight: 600;
color: #fff;
}
.use-case-card p {
margin: 0;
font-size: 0.85em;
color: rgba(255,255,255,0.5);
line-height: 1.5;
}
/* === Footer === */
.footer-section {
text-align: center;
padding: 24px;
margin-top: 20px;
color: rgba(255,255,255,0.35);
font-size: 0.85em;
border-top: 1px solid rgba(255,255,255,0.05);
}
.footer-section a {
color: #FF6B35;
text-decoration: none;
}
/* === Hide default footer === */
footer { display: none !important; }
"""
with gr.Blocks(title="Indic Faker — Indian Synthetic Data Generator") as demo:
# === Hero ===
gr.HTML("""
<div class="hero-section">
<div class="hero-title">🇮🇳 Indic Faker</div>
<div class="hero-subtitle">
Generate realistic Indian synthetic data in <strong>8 languages</strong> with native script support.<br>
Aadhaar · PAN · UPI · Names · Addresses · Salaries — all culturally accurate.
</div>
<div class="hero-badges">
<a href="https://pypi.org/project/indic-faker/" target="_blank">
<img src="https://img.shields.io/pypi/v/indic-faker?style=for-the-badge&color=FF6B35" alt="PyPI">
</a>
<a href="https://github.com/adwaith-0/indic-faker" target="_blank">
<img src="https://img.shields.io/github/stars/adwaith-0/indic-faker?style=for-the-badge&logo=github&color=6366f1" alt="GitHub">
</a>
<a href="https://huggingface.co/datasets/adwaith06/indic-synthetic-profiles" target="_blank">
<img src="https://img.shields.io/badge/🤗_Dataset-10K_Rows-blue?style=for-the-badge" alt="HuggingFace">
</a>
</div>
</div>
""")
gr.HTML("""
<div class="stats-bar">
<div class="stat-item">
<div class="stat-number">8</div>
<div class="stat-label">Languages</div>
</div>
<div class="stat-item">
<div class="stat-number">21+</div>
<div class="stat-label">Data Fields</div>
</div>
<div class="stat-item">
<div class="stat-number">50+</div>
<div class="stat-label">Providers</div>
</div>
<div class="stat-item">
<div class="stat-number">10K</div>
<div class="stat-label">HF Dataset</div>
</div>
</div>
""")
with gr.Tabs():
# === Tab 1: Batch Generate ===
with gr.TabItem("📊 Batch Generate"):
with gr.Row(equal_height=True):
with gr.Column(scale=1, min_width=300):
lang_dropdown = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="🇮🇳 Hindi (हिन्दी)",
label="🌐 Language",
info="Choose a language for generated data",
)
num_slider = gr.Slider(
minimum=1, maximum=100, value=10, step=1,
label="📏 Number of Profiles",
info="How many profiles to generate",
)
fields_select = gr.CheckboxGroup(
choices=FIELD_OPTIONS,
value=DEFAULT_FIELDS,
label="📋 Fields to Include",
info="Select the fields you want in your data",
)
generate_btn = gr.Button(
"🚀 Generate Data",
variant="primary",
size="lg",
elem_classes=["generate-btn"],
)
output_table = gr.Dataframe(
label="Generated Profiles",
wrap=True,
interactive=False,
max_height=500,
)
generate_btn.click(
fn=generate_profiles,
inputs=[lang_dropdown, num_slider, fields_select],
outputs=output_table,
)
# === Tab 2: Single Profile ===
with gr.TabItem("👤 Single Profile"):
with gr.Row():
with gr.Column(scale=1, min_width=250):
single_lang = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="🇮🇳 Kannada (ಕನ್ನಡ)",
label="🌐 Language",
)
single_btn = gr.Button(
"🎲 Generate Profile",
variant="primary",
size="lg",
elem_classes=["generate-btn"],
)
gr.HTML("""
<div style="margin-top:16px; padding:16px; background:rgba(255,255,255,0.03);
border-radius:12px; border:1px solid rgba(255,255,255,0.06);">
<div style="font-size:0.8em; color:rgba(255,255,255,0.4); font-weight:600;
text-transform:uppercase; letter-spacing:0.08em; margin-bottom:8px;">
💡 Tip
</div>
<div style="font-size:0.85em; color:rgba(255,255,255,0.55); line-height:1.6;">
Each click generates a unique, realistic Indian identity with
culturally accurate data — names match their native script
transliterations.
</div>
</div>
""")
with gr.Column(scale=2):
profile_output = gr.HTML(
value="<div style='text-align:center; padding:60px; color:rgba(255,255,255,0.3); font-size:1.1em;'>Click <strong>Generate Profile</strong> to create an identity →</div>",
)
single_btn.click(
fn=generate_single,
inputs=[single_lang],
outputs=profile_output,
)
# === Tab 3: Quick Start ===
with gr.TabItem("💻 Quick Start"):
gr.HTML("""<div class="code-section">""")
gr.Markdown("""
### Installation
```bash
pip install indic-faker
```
### Usage
```python
from indic_faker import IndicFaker
fake = IndicFaker(language="ta") # Tamil
# Single fields
print(fake.name()) # Rajesh Krishnan
print(fake.name("native")) # ராஜேஷ் கிருஷ்ணன்
print(fake.aadhaar()) # 4532 8891 2234
print(fake.pan()) # BXYPK7234R
print(fake.upi_id()) # rajesh.k@okaxis
# Full profile with all 21+ fields
profile = fake.profile()
# Batch → DataFrame (great for ML)
df = fake.to_dataframe(1000)
df.to_csv("indian_data.csv")
```
### Load from HuggingFace
```python
from datasets import load_dataset
ds = load_dataset("adwaith06/indic-synthetic-profiles")
```
""")
gr.HTML("""</div>""")
gr.HTML("""
<div class="use-cases-grid">
<div class="use-case-card">
<div class="icon">🔍</div>
<h5>Fraud Detection</h5>
<p>Train ML models on synthetic Indian transaction data with realistic Aadhaar, PAN, and bank details.</p>
</div>
<div class="use-case-card">
<div class="icon">🤖</div>
<h5>LLM Fine-Tuning</h5>
<p>Create Hindi, Tamil, Telugu instruction datasets with native script names and addresses.</p>
</div>
<div class="use-case-card">
<div class="icon">🏦</div>
<h5>KYC System Testing</h5>
<p>Generate realistic Aadhaar + PAN + bank account combos for fintech QA.</p>
</div>
<div class="use-case-card">
<div class="icon">📊</div>
<h5>Data Pipeline QA</h5>
<p>Fill staging databases with 10K+ realistic Indian records in seconds.</p>
</div>
<div class="use-case-card">
<div class="icon">🎓</div>
<h5>Teaching & Demos</h5>
<p>Demo data engineering and analytics with culturally relevant, privacy-safe data.</p>
</div>
<div class="use-case-card">
<div class="icon">🧪</div>
<h5>Unit Testing</h5>
<p>Seed test fixtures with varied Indian data — 8 languages, multiple formats.</p>
</div>
</div>
""")
gr.HTML("""
<div class="footer-section">
Built with ❤️ by <a href="https://github.com/adwaith-0" target="_blank">adwaith-0</a> ·
<code>pip install indic-faker</code> ·
<a href="https://github.com/adwaith-0/indic-faker" target="_blank">GitHub</a> ·
<a href="https://huggingface.co/datasets/adwaith06/indic-synthetic-profiles" target="_blank">HuggingFace</a>
</div>
""")
if __name__ == "__main__":
demo.launch(
theme=gr.themes.Base(
primary_hue=gr.themes.Color(
c50="#fff7ed", c100="#ffedd5", c200="#fed7aa", c300="#fdba74",
c400="#fb923c", c500="#f97316", c600="#ea580c", c700="#c2410c",
c800="#9a3412", c900="#7c2d12", c950="#431407",
),
secondary_hue="indigo",
neutral_hue="slate",
font=["Inter", "system-ui", "sans-serif"],
font_mono=["JetBrains Mono", "monospace"],
).set(
body_background_fill="linear-gradient(160deg, #0a0a1a 0%, #1a1a2e 40%, #16213e 100%)",
body_text_color="rgba(255,255,255,0.85)",
block_background_fill="rgba(20,20,40,0.6)",
block_border_color="rgba(255,255,255,0.06)",
block_label_text_color="rgba(255,255,255,0.6)",
block_title_text_color="rgba(255,255,255,0.8)",
input_background_fill="rgba(15,15,35,0.8)",
input_border_color="rgba(255,255,255,0.1)",
input_placeholder_color="rgba(255,255,255,0.3)",
button_primary_background_fill="linear-gradient(135deg, #FF6B35, #F7931E)",
button_primary_text_color="white",
button_primary_border_color="transparent",
checkbox_background_color="rgba(15,15,35,0.8)",
checkbox_border_color="rgba(255,255,255,0.15)",
checkbox_label_text_color="rgba(255,255,255,0.7)",
shadow_drop="0 4px 24px rgba(0,0,0,0.3)",
block_shadow="0 4px 24px rgba(0,0,0,0.2)",
block_border_width="1px",
block_radius="16px",
input_radius="12px",
),
css=css,
)