Spaces:
Sleeping
Sleeping
Deploy Receipt Scanner
Browse files- README.md +54 -5
- app.py +240 -0
- requirements.txt +4 -0
- shared/components.py +375 -0
- shared/styles.css +425 -0
- shared/utils.py +366 -0
README.md
CHANGED
|
@@ -1,12 +1,61 @@
|
|
| 1 |
---
|
| 2 |
title: Receipt Scanner
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 6.13.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Receipt Scanner
|
| 3 |
+
emoji: 🧾
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
|
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: false
|
| 9 |
+
license: mit
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Receipt Scanner
|
| 13 |
+
|
| 14 |
+
## Question
|
| 15 |
+
|
| 16 |
+
How do we turn a document image into structured data a program can use?
|
| 17 |
+
|
| 18 |
+
## System Boundary
|
| 19 |
+
|
| 20 |
+
This Space treats receipt understanding as a multimodal extraction problem: image in, schema out.
|
| 21 |
+
|
| 22 |
+
## Method
|
| 23 |
+
|
| 24 |
+
A vision-language model reads the uploaded receipt and produces structured fields such as merchant, date, item rows, subtotal, tax, total, and payment details. The app parses the model output into table and JSON views.
|
| 25 |
+
|
| 26 |
+
## Technique
|
| 27 |
+
|
| 28 |
+
This is multimodal information extraction. The model must read pixels, infer document layout, identify fields, and emit a schema that downstream software can consume.
|
| 29 |
+
|
| 30 |
+
The difficult part is not only recognizing text. The difficult part is assigning text to the correct semantic field: item, price, tax, total, date, or merchant.
|
| 31 |
+
|
| 32 |
+
## Output
|
| 33 |
+
|
| 34 |
+
The app returns a summary, an item table, raw structured JSON, and exportable records.
|
| 35 |
+
|
| 36 |
+
## Why It Matters
|
| 37 |
+
|
| 38 |
+
The useful part of document AI is not OCR alone. The useful part is converting messy visual evidence into validated fields that can enter a database, review queue, or accounting workflow.
|
| 39 |
+
|
| 40 |
+
## What To Notice
|
| 41 |
+
|
| 42 |
+
Check whether totals reconcile with item rows and whether the model preserves uncertainty. Structured extraction should be judged at the field level, not only by a nice-looking summary.
|
| 43 |
+
|
| 44 |
+
## Effect In Practice
|
| 45 |
+
|
| 46 |
+
Receipt extraction is a small version of a larger document-understanding pattern used for invoices, insurance forms, procurement, and expense workflows.
|
| 47 |
+
|
| 48 |
+
## Hugging Face Extension
|
| 49 |
+
|
| 50 |
+
The Space can be extended with a receipt dataset, field-level accuracy metrics, and model comparisons across open vision-language models.
|
| 51 |
+
|
| 52 |
+
## Limitations
|
| 53 |
+
|
| 54 |
+
Receipt formats vary widely. A production system should add confidence estimates, field-level validation, human review, and evaluation on a labeled receipt dataset.
|
| 55 |
+
|
| 56 |
+
## Run Locally
|
| 57 |
+
|
| 58 |
+
```bash
|
| 59 |
+
pip install -r requirements.txt
|
| 60 |
+
python app.py
|
| 61 |
+
```
|
app.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Receipt Scanner
|
| 3 |
+
Upload a receipt photo and extract structured data (items, prices, totals).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from huggingface_hub import InferenceClient
|
| 8 |
+
from PIL import Image
|
| 9 |
+
import base64
|
| 10 |
+
from io import BytesIO
|
| 11 |
+
import json
|
| 12 |
+
import pandas as pd
|
| 13 |
+
import re
|
| 14 |
+
import os
|
| 15 |
+
import sys
|
| 16 |
+
|
| 17 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
|
| 18 |
+
from shared.components import create_method_panel, create_premium_hero
|
| 19 |
+
|
| 20 |
+
# Initialize Hugging Face Inference Client
|
| 21 |
+
client = InferenceClient()
|
| 22 |
+
|
| 23 |
+
EXTRACTION_PROMPT = """Analyze this receipt image and extract ALL information in a structured format.
|
| 24 |
+
|
| 25 |
+
Extract:
|
| 26 |
+
1. **Merchant/Store Name**
|
| 27 |
+
2. **Date** (in YYYY-MM-DD format if possible)
|
| 28 |
+
3. **Time** (if visible)
|
| 29 |
+
4. **Items** - List each item with its price
|
| 30 |
+
5. **Subtotal** (if shown)
|
| 31 |
+
6. **Tax** (if shown)
|
| 32 |
+
7. **Total Amount**
|
| 33 |
+
8. **Payment Method** (if visible)
|
| 34 |
+
|
| 35 |
+
Format your response EXACTLY as JSON:
|
| 36 |
+
```json
|
| 37 |
+
{
|
| 38 |
+
"merchant": "Store Name",
|
| 39 |
+
"date": "YYYY-MM-DD",
|
| 40 |
+
"time": "HH:MM",
|
| 41 |
+
"items": [
|
| 42 |
+
{"name": "Item 1", "price": 0.00},
|
| 43 |
+
{"name": "Item 2", "price": 0.00}
|
| 44 |
+
],
|
| 45 |
+
"subtotal": 0.00,
|
| 46 |
+
"tax": 0.00,
|
| 47 |
+
"total": 0.00,
|
| 48 |
+
"payment_method": "Card/Cash/etc"
|
| 49 |
+
}
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
Be precise with numbers. If something is unclear, use null."""
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def extract_json_from_text(text):
|
| 56 |
+
"""Extract JSON from markdown code blocks or raw text."""
|
| 57 |
+
# Try to find JSON in code blocks first
|
| 58 |
+
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
|
| 59 |
+
if json_match:
|
| 60 |
+
return json_match.group(1)
|
| 61 |
+
|
| 62 |
+
# Try to find raw JSON
|
| 63 |
+
json_match = re.search(r'\{.*\}', text, re.DOTALL)
|
| 64 |
+
if json_match:
|
| 65 |
+
return json_match.group(0)
|
| 66 |
+
|
| 67 |
+
return None
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def scan_receipt(image):
|
| 71 |
+
"""Extract structured data from receipt using VLM."""
|
| 72 |
+
if image is None:
|
| 73 |
+
return "❌ Please upload a receipt first!", "", ""
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
# Convert PIL Image to base64
|
| 77 |
+
buffered = BytesIO()
|
| 78 |
+
if isinstance(image, str):
|
| 79 |
+
image = Image.open(image)
|
| 80 |
+
image.save(buffered, format="PNG")
|
| 81 |
+
img_str = base64.b64encode(buffered.getvalue()).decode()
|
| 82 |
+
|
| 83 |
+
# Use Florence-2 or Qwen2-VL for OCR + understanding
|
| 84 |
+
response = client.chat_completion(
|
| 85 |
+
model="Qwen/Qwen2-VL-7B-Instruct",
|
| 86 |
+
messages=[
|
| 87 |
+
{
|
| 88 |
+
"role": "user",
|
| 89 |
+
"content": [
|
| 90 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}},
|
| 91 |
+
{"type": "text", "text": EXTRACTION_PROMPT}
|
| 92 |
+
]
|
| 93 |
+
}
|
| 94 |
+
],
|
| 95 |
+
max_tokens=1000,
|
| 96 |
+
temperature=0.1 # Low temperature for accuracy
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
raw_response = response.choices[0].message.content
|
| 100 |
+
|
| 101 |
+
# Extract JSON from response
|
| 102 |
+
json_str = extract_json_from_text(raw_response)
|
| 103 |
+
if not json_str:
|
| 104 |
+
return f"⚠️ Could not parse receipt data.\n\nRaw response:\n{raw_response}", "", ""
|
| 105 |
+
|
| 106 |
+
# Parse JSON
|
| 107 |
+
data = json.loads(json_str)
|
| 108 |
+
|
| 109 |
+
# Create formatted summary
|
| 110 |
+
summary = f"""# 🧾 Receipt Analysis
|
| 111 |
+
|
| 112 |
+
**Merchant**: {data.get('merchant', 'N/A')}
|
| 113 |
+
**Date**: {data.get('date', 'N/A')}
|
| 114 |
+
**Time**: {data.get('time', 'N/A')}
|
| 115 |
+
|
| 116 |
+
---
|
| 117 |
+
|
| 118 |
+
## 📦 Items
|
| 119 |
+
|
| 120 |
+
"""
|
| 121 |
+
# Add items table
|
| 122 |
+
if data.get('items'):
|
| 123 |
+
for item in data['items']:
|
| 124 |
+
name = item.get('name', 'Unknown')
|
| 125 |
+
price = item.get('price', 0.0)
|
| 126 |
+
summary += f"- **{name}**: ${price:.2f}\n"
|
| 127 |
+
else:
|
| 128 |
+
summary += "*No items found*\n"
|
| 129 |
+
|
| 130 |
+
summary += f"""
|
| 131 |
+
---
|
| 132 |
+
|
| 133 |
+
## 💰 Totals
|
| 134 |
+
|
| 135 |
+
- **Subtotal**: ${data.get('subtotal', 0.0):.2f}
|
| 136 |
+
- **Tax**: ${data.get('tax', 0.0):.2f}
|
| 137 |
+
- **Total**: ${data.get('total', 0.0):.2f}
|
| 138 |
+
|
| 139 |
+
**Payment**: {data.get('payment_method', 'N/A')}
|
| 140 |
+
"""
|
| 141 |
+
|
| 142 |
+
# Create DataFrame for table view
|
| 143 |
+
if data.get('items'):
|
| 144 |
+
df = pd.DataFrame(data['items'])
|
| 145 |
+
df['price'] = df['price'].apply(lambda x: f"${x:.2f}")
|
| 146 |
+
else:
|
| 147 |
+
df = pd.DataFrame(columns=['name', 'price'])
|
| 148 |
+
|
| 149 |
+
# Format JSON for download
|
| 150 |
+
json_output = json.dumps(data, indent=2)
|
| 151 |
+
|
| 152 |
+
return summary, df, json_output
|
| 153 |
+
|
| 154 |
+
except json.JSONDecodeError as e:
|
| 155 |
+
return f"❌ Error parsing JSON: {str(e)}\n\nRaw response:\n{raw_response}", "", ""
|
| 156 |
+
except Exception as e:
|
| 157 |
+
return f"❌ Error scanning receipt: {str(e)}", "", ""
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
# Gradio Interface
|
| 161 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 162 |
+
create_premium_hero(
|
| 163 |
+
"Receipt Scanner",
|
| 164 |
+
"Extract merchant, items, totals, and payment details from receipt images with a vision-language model workflow.",
|
| 165 |
+
"🧾",
|
| 166 |
+
badge="Document Vision",
|
| 167 |
+
highlights=["Vision-language extraction", "Structured JSON", "CSV export"],
|
| 168 |
+
)
|
| 169 |
+
create_method_panel({
|
| 170 |
+
"Technique": "Image-to-structured-data extraction with schema parsing and tabular validation.",
|
| 171 |
+
"What it proves": "You can turn multimodal model output into reliable downstream data products.",
|
| 172 |
+
"HF capability": "Designed for Hub-hosted VLM inference and lightweight Space deployment.",
|
| 173 |
+
})
|
| 174 |
+
|
| 175 |
+
with gr.Row():
|
| 176 |
+
with gr.Column(scale=1):
|
| 177 |
+
image_input = gr.Image(
|
| 178 |
+
label="📸 Upload Receipt Photo",
|
| 179 |
+
type="pil",
|
| 180 |
+
height=400
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
scan_btn = gr.Button("🔍 Scan Receipt", variant="primary", size="lg")
|
| 184 |
+
|
| 185 |
+
gr.Markdown("""
|
| 186 |
+
### 💡 Tips for Best Results:
|
| 187 |
+
- Good lighting, minimal shadows
|
| 188 |
+
- Receipt should be flat and clear
|
| 189 |
+
- Include the entire receipt
|
| 190 |
+
- High contrast works best
|
| 191 |
+
""")
|
| 192 |
+
|
| 193 |
+
with gr.Column(scale=1):
|
| 194 |
+
summary_output = gr.Markdown(label="📊 Summary")
|
| 195 |
+
|
| 196 |
+
with gr.Row():
|
| 197 |
+
with gr.Column():
|
| 198 |
+
table_output = gr.Dataframe(
|
| 199 |
+
label="📋 Items Table",
|
| 200 |
+
headers=["name", "price"],
|
| 201 |
+
interactive=False
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
with gr.Column():
|
| 205 |
+
json_output = gr.Textbox(
|
| 206 |
+
label="📄 JSON Data (copy to download)",
|
| 207 |
+
lines=15,
|
| 208 |
+
max_lines=20
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
# Event handler
|
| 212 |
+
scan_btn.click(
|
| 213 |
+
fn=scan_receipt,
|
| 214 |
+
inputs=[image_input],
|
| 215 |
+
outputs=[summary_output, table_output, json_output],
|
| 216 |
+
api_name="scan"
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
gr.Markdown("""
|
| 220 |
+
---
|
| 221 |
+
### 🎓 What This App Does:
|
| 222 |
+
|
| 223 |
+
1. **OCR + Understanding**: Doesn't just read text, understands structure
|
| 224 |
+
2. **Data Extraction**: Identifies items, prices, totals, dates
|
| 225 |
+
3. **JSON Export**: Download structured data for expense tracking
|
| 226 |
+
4. **Table View**: See items in an organized format
|
| 227 |
+
|
| 228 |
+
### 📊 Use Cases:
|
| 229 |
+
|
| 230 |
+
- **Expense Tracking**: Digitize receipts for accounting
|
| 231 |
+
- **Budget Apps**: Auto-import spending data
|
| 232 |
+
- **Tax Records**: Organize business expenses
|
| 233 |
+
- **Reimbursements**: Submit itemized claims
|
| 234 |
+
- **Personal Finance**: Track spending categories
|
| 235 |
+
|
| 236 |
+
*Note: Accuracy depends on receipt clarity and format. Complex layouts may require manual verification.*
|
| 237 |
+
""")
|
| 238 |
+
|
| 239 |
+
if __name__ == "__main__":
|
| 240 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.44.0
|
| 2 |
+
huggingface-hub==0.25.0
|
| 3 |
+
Pillow==11.0.0
|
| 4 |
+
pandas==2.2.3
|
shared/components.py
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HF-Master Shared Components
|
| 3 |
+
Reusable Gradio components for all projects
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import html
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
import gradio as gr
|
| 9 |
+
from typing import List, Tuple, Optional, Dict, Any
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class SharedComponents:
|
| 13 |
+
"""Shared UI components for all HF-Master projects"""
|
| 14 |
+
|
| 15 |
+
@staticmethod
|
| 16 |
+
def _esc(value: Any) -> str:
|
| 17 |
+
"""Escape text before placing it inside shared HTML components."""
|
| 18 |
+
return html.escape(str(value), quote=True)
|
| 19 |
+
|
| 20 |
+
@staticmethod
|
| 21 |
+
def _style_tag() -> str:
|
| 22 |
+
"""Inline the shared stylesheet for Gradio Spaces that do not pass css=."""
|
| 23 |
+
style_path = Path(__file__).with_name("styles.css")
|
| 24 |
+
try:
|
| 25 |
+
return f"<style>{style_path.read_text(encoding='utf-8')}</style>"
|
| 26 |
+
except OSError:
|
| 27 |
+
return ""
|
| 28 |
+
|
| 29 |
+
@staticmethod
|
| 30 |
+
def create_header(title: str, description: str, emoji: str = "🚀") -> gr.Markdown:
|
| 31 |
+
"""Create standardized project header"""
|
| 32 |
+
return SharedComponents.create_premium_hero(
|
| 33 |
+
title=title,
|
| 34 |
+
description=description,
|
| 35 |
+
emoji=emoji,
|
| 36 |
+
badge="AI/ML Space",
|
| 37 |
+
highlights=["Interactive demo", "Explainable workflow", "HF-ready"],
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
@staticmethod
|
| 41 |
+
def create_footer(version: str = "1.0.0") -> gr.Markdown:
|
| 42 |
+
"""Create standardized project footer"""
|
| 43 |
+
return gr.Markdown(f"""
|
| 44 |
+
<div class="hf-footer">
|
| 45 |
+
<strong>{SharedComponents._esc(version)}</strong> · Built as a practical AI/ML learning Space for the Hugging Face community.
|
| 46 |
+
</div>
|
| 47 |
+
""")
|
| 48 |
+
|
| 49 |
+
@staticmethod
|
| 50 |
+
def create_premium_hero(
|
| 51 |
+
title: str,
|
| 52 |
+
description: str,
|
| 53 |
+
emoji: str = "🚀",
|
| 54 |
+
badge: str = "Featured Space",
|
| 55 |
+
highlights: Optional[List[str]] = None,
|
| 56 |
+
) -> gr.HTML:
|
| 57 |
+
"""Create a richer landing-page hero for Spaces."""
|
| 58 |
+
highlights = highlights or []
|
| 59 |
+
esc = SharedComponents._esc
|
| 60 |
+
chips = "".join(
|
| 61 |
+
f"""
|
| 62 |
+
<span class="hf-chip">{esc(item)}</span>
|
| 63 |
+
"""
|
| 64 |
+
for item in highlights
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
return gr.HTML(f"""
|
| 68 |
+
{SharedComponents._style_tag()}
|
| 69 |
+
<div class="hf-hero">
|
| 70 |
+
<div class="hf-hero-grid">
|
| 71 |
+
<div class="hf-hero-copy">
|
| 72 |
+
<div class="hf-icon">{esc(emoji)}</div>
|
| 73 |
+
<div>
|
| 74 |
+
<div class="hf-badge">{esc(badge)}</div>
|
| 75 |
+
<h1>{esc(title)}</h1>
|
| 76 |
+
<p>{esc(description)}</p>
|
| 77 |
+
</div>
|
| 78 |
+
</div>
|
| 79 |
+
</div>
|
| 80 |
+
<div class="hf-chip-row">
|
| 81 |
+
{chips}
|
| 82 |
+
</div>
|
| 83 |
+
</div>
|
| 84 |
+
""")
|
| 85 |
+
|
| 86 |
+
@staticmethod
|
| 87 |
+
def create_method_panel(items: Dict[str, str]) -> gr.HTML:
|
| 88 |
+
"""Create a compact method/pipeline explainer panel."""
|
| 89 |
+
esc = SharedComponents._esc
|
| 90 |
+
cards = "".join(
|
| 91 |
+
f"""
|
| 92 |
+
<div class="hf-method-card">
|
| 93 |
+
<span>{esc(label)}</span>
|
| 94 |
+
<p>{esc(text)}</p>
|
| 95 |
+
</div>
|
| 96 |
+
"""
|
| 97 |
+
for label, text in items.items()
|
| 98 |
+
)
|
| 99 |
+
return gr.HTML(f"""{SharedComponents._style_tag()}<div class="hf-method-grid">{cards}</div>""")
|
| 100 |
+
|
| 101 |
+
@staticmethod
|
| 102 |
+
def create_status_badge(status: str) -> str:
|
| 103 |
+
"""Create status badge"""
|
| 104 |
+
colors = {
|
| 105 |
+
"complete": "🟢",
|
| 106 |
+
"in-progress": "🟡",
|
| 107 |
+
"planned": "⚪",
|
| 108 |
+
"experimental": "🔴"
|
| 109 |
+
}
|
| 110 |
+
return colors.get(status.lower(), "⚪")
|
| 111 |
+
|
| 112 |
+
@staticmethod
|
| 113 |
+
def create_project_card(
|
| 114 |
+
title: str,
|
| 115 |
+
description: str,
|
| 116 |
+
tech_stack: List[str],
|
| 117 |
+
difficulty: str,
|
| 118 |
+
viral_potential: str
|
| 119 |
+
) -> str:
|
| 120 |
+
"""Create markdown project card"""
|
| 121 |
+
tech_badges = " ".join([f"`{t}`" for t in tech_stack])
|
| 122 |
+
|
| 123 |
+
return f"""
|
| 124 |
+
## {title}
|
| 125 |
+
|
| 126 |
+
{description}
|
| 127 |
+
|
| 128 |
+
**Tech Stack:** {tech_badges}
|
| 129 |
+
|
| 130 |
+
**Difficulty:** {difficulty} | **Viral Potential:** {viral_potential}
|
| 131 |
+
"""
|
| 132 |
+
|
| 133 |
+
@staticmethod
|
| 134 |
+
def create_risk_chart(risk_factors: Dict[str, float]) -> Any:
|
| 135 |
+
"""Create risk factor visualization"""
|
| 136 |
+
import plotly.graph_objects as go
|
| 137 |
+
|
| 138 |
+
factors = list(risk_factors.keys())
|
| 139 |
+
scores = [risk_factors[f] * 100 for f in factors]
|
| 140 |
+
|
| 141 |
+
fig = go.Figure(data=[
|
| 142 |
+
go.Bar(
|
| 143 |
+
x=scores,
|
| 144 |
+
y=[f.replace('_', ' ').title() for f in factors],
|
| 145 |
+
orientation='h',
|
| 146 |
+
marker=dict(
|
| 147 |
+
color=scores,
|
| 148 |
+
colorscale='RdYlGn_r',
|
| 149 |
+
cmin=0,
|
| 150 |
+
cmax=100
|
| 151 |
+
)
|
| 152 |
+
)
|
| 153 |
+
])
|
| 154 |
+
|
| 155 |
+
fig.update_layout(
|
| 156 |
+
title="Risk Factor Breakdown",
|
| 157 |
+
xaxis_title="Risk Score",
|
| 158 |
+
yaxis_title="Factor",
|
| 159 |
+
height=400,
|
| 160 |
+
template="plotly_white"
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
return fig
|
| 164 |
+
|
| 165 |
+
@staticmethod
|
| 166 |
+
def create_comparison_chart(items: List[Dict], keys: List[str]) -> Any:
|
| 167 |
+
"""Create comparison visualization"""
|
| 168 |
+
import plotly.graph_objects as go
|
| 169 |
+
|
| 170 |
+
fig = go.Figure()
|
| 171 |
+
|
| 172 |
+
for i, item in enumerate(items):
|
| 173 |
+
fig.add_trace(go.Bar(
|
| 174 |
+
name=item.get('name', f'Item {i+1}'),
|
| 175 |
+
x=keys,
|
| 176 |
+
y=[item.get(k, 0) for k in keys]
|
| 177 |
+
))
|
| 178 |
+
|
| 179 |
+
fig.update_layout(
|
| 180 |
+
barmode='group',
|
| 181 |
+
height=400
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
return fig
|
| 185 |
+
|
| 186 |
+
@staticmethod
|
| 187 |
+
def create_metric_card(label: str, value: str, emoji: str = "📊") -> gr.Markdown:
|
| 188 |
+
"""Create metric display card"""
|
| 189 |
+
return gr.Markdown(f"""
|
| 190 |
+
### {emoji} {label}
|
| 191 |
+
|
| 192 |
+
**{value}**
|
| 193 |
+
""")
|
| 194 |
+
|
| 195 |
+
@staticmethod
|
| 196 |
+
def create_error_display(error: str) -> gr.Markdown:
|
| 197 |
+
"""Create error message display"""
|
| 198 |
+
return gr.Markdown(f"""
|
| 199 |
+
❌ **Error**
|
| 200 |
+
|
| 201 |
+
{error}
|
| 202 |
+
""")
|
| 203 |
+
|
| 204 |
+
@staticmethod
|
| 205 |
+
def create_success_display(message: str) -> gr.Markdown:
|
| 206 |
+
"""Create success message display"""
|
| 207 |
+
return gr.Markdown(f"""
|
| 208 |
+
✅ **Success**
|
| 209 |
+
|
| 210 |
+
{message}
|
| 211 |
+
""")
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
class LoadingSpinner:
|
| 215 |
+
"""Loading state display"""
|
| 216 |
+
|
| 217 |
+
@staticmethod
|
| 218 |
+
def create_spinner(message: str = "Loading...") -> gr.Markdown:
|
| 219 |
+
"""Create loading spinner"""
|
| 220 |
+
return gr.Markdown(f"""
|
| 221 |
+
⏳ **{message}**
|
| 222 |
+
|
| 223 |
+
_This may take a moment..._
|
| 224 |
+
""")
|
| 225 |
+
|
| 226 |
+
@staticmethod
|
| 227 |
+
def create_progress_bar(initial: float = 0) -> gr.Markdown:
|
| 228 |
+
"""Create progress display"""
|
| 229 |
+
return gr.Markdown(f"""
|
| 230 |
+
░░░░░░░░░ **{initial}%**
|
| 231 |
+
""")
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
class TableFormatter:
|
| 235 |
+
"""Format data as tables"""
|
| 236 |
+
|
| 237 |
+
@staticmethod
|
| 238 |
+
def format_dict_table(data: Dict[str, Any], headers: List[str] = None) -> List:
|
| 239 |
+
"""Format dictionary as table rows"""
|
| 240 |
+
if not headers:
|
| 241 |
+
headers = ["Key", "Value"]
|
| 242 |
+
|
| 243 |
+
rows = []
|
| 244 |
+
for key, value in data.items():
|
| 245 |
+
rows.append([key, str(value)])
|
| 246 |
+
|
| 247 |
+
return [headers] + rows
|
| 248 |
+
|
| 249 |
+
@staticmethod
|
| 250 |
+
def create_dataframe(data: List[Dict], columns: List[str] = None) -> List:
|
| 251 |
+
"""Create dataframe-compatible data structure"""
|
| 252 |
+
if not data:
|
| 253 |
+
return []
|
| 254 |
+
|
| 255 |
+
if columns:
|
| 256 |
+
headers = columns
|
| 257 |
+
else:
|
| 258 |
+
headers = list(data[0].keys()) if data else []
|
| 259 |
+
|
| 260 |
+
rows = [[row.get(h, "") for h in headers] for row in data]
|
| 261 |
+
|
| 262 |
+
return [headers] + rows
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
class CodeHighlighter:
|
| 266 |
+
"""Code display and highlighting"""
|
| 267 |
+
|
| 268 |
+
@staticmethod
|
| 269 |
+
def create_code_display(code: str, language: str = "python") -> gr.Code:
|
| 270 |
+
"""Create code display block"""
|
| 271 |
+
return gr.Code(
|
| 272 |
+
value=code,
|
| 273 |
+
language=language,
|
| 274 |
+
lines=20
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
@staticmethod
|
| 278 |
+
def create_copy_button(code: str) -> gr.Button:
|
| 279 |
+
"""Create copy-to-clipboard button"""
|
| 280 |
+
return gr.Button("📋 Copy Code")
|
| 281 |
+
|
| 282 |
+
@staticmethod
|
| 283 |
+
def create_diff_view(old_code: str, new_code: str) -> Tuple[gr.Code, gr.Code]:
|
| 284 |
+
"""Create side-by-side diff view"""
|
| 285 |
+
return (
|
| 286 |
+
gr.Code(value=old_code, language="python", lines=15, label="Before"),
|
| 287 |
+
gr.Code(value=new_code, language="python", lines=15, label="After")
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
def create_header(title: str, description: str, emoji: str = "🚀") -> gr.Markdown:
|
| 292 |
+
return SharedComponents.create_header(title, description, emoji)
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def create_footer(version: str = "1.0.0") -> gr.Markdown:
|
| 296 |
+
return SharedComponents.create_footer(version)
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def create_premium_hero(
|
| 300 |
+
title: str,
|
| 301 |
+
description: str,
|
| 302 |
+
emoji: str = "🚀",
|
| 303 |
+
badge: str = "Featured Space",
|
| 304 |
+
highlights: Optional[List[str]] = None,
|
| 305 |
+
) -> gr.HTML:
|
| 306 |
+
return SharedComponents.create_premium_hero(title, description, emoji, badge, highlights)
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def create_method_panel(items: Dict[str, str]) -> gr.HTML:
|
| 310 |
+
return SharedComponents.create_method_panel(items)
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
class ProgressTracker:
|
| 314 |
+
"""Track multi-step progress"""
|
| 315 |
+
|
| 316 |
+
def __init__(self, steps: List[str]):
|
| 317 |
+
self.steps = steps
|
| 318 |
+
self.current = 0
|
| 319 |
+
|
| 320 |
+
def get_status(self) -> str:
|
| 321 |
+
"""Get current status"""
|
| 322 |
+
completed = "✅ " + "\n".join(self.steps[:self.current])
|
| 323 |
+
current = f"🔄 {self.steps[self.current]}" if self.current < len(self.steps) else ""
|
| 324 |
+
remaining = "\n".join([f"⬜ {s}" for s in self.steps[self.current+1:]])
|
| 325 |
+
|
| 326 |
+
return f"""
|
| 327 |
+
## Progress
|
| 328 |
+
|
| 329 |
+
{completed}
|
| 330 |
+
{current}
|
| 331 |
+
{remaining}
|
| 332 |
+
"""
|
| 333 |
+
|
| 334 |
+
def advance(self) -> bool:
|
| 335 |
+
"""Move to next step"""
|
| 336 |
+
if self.current < len(self.steps):
|
| 337 |
+
self.current += 1
|
| 338 |
+
return True
|
| 339 |
+
return False
|
| 340 |
+
|
| 341 |
+
def reset(self):
|
| 342 |
+
"""Reset progress"""
|
| 343 |
+
self.current = 0
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
def create_tabbed_interface(tabs: Dict[str, Any]) -> gr.Blocks:
|
| 347 |
+
"""Create tabbed interface helper"""
|
| 348 |
+
with gr.Blocks() as demo:
|
| 349 |
+
with gr.Tabs():
|
| 350 |
+
for tab_name, tab_content in tabs.items():
|
| 351 |
+
with gr.Tab(tab_name):
|
| 352 |
+
tab_content
|
| 353 |
+
|
| 354 |
+
return demo
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def create_side_by_side(left_content: Any, right_content: Any) -> Tuple[gr.Column, gr.Column]:
|
| 358 |
+
"""Create side-by-side layout"""
|
| 359 |
+
with gr.Row():
|
| 360 |
+
with gr.Column():
|
| 361 |
+
left_content
|
| 362 |
+
with gr.Column():
|
| 363 |
+
right_content
|
| 364 |
+
|
| 365 |
+
return left_content, right_content
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def create_accordion(items: List[Tuple[str, Any]]) -> gr.Accordion:
|
| 369 |
+
"""Create accordion-style expandable sections"""
|
| 370 |
+
with gr.Accordion("Click to expand") as accordion:
|
| 371 |
+
for title, content in items:
|
| 372 |
+
gr.Markdown(f"### {title}")
|
| 373 |
+
content
|
| 374 |
+
|
| 375 |
+
return accordion
|
shared/styles.css
ADDED
|
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* HF-Master Shared Styles
|
| 2 |
+
Light research-studio system inspired by MCP Video Localizer. */
|
| 3 |
+
|
| 4 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
|
| 5 |
+
|
| 6 |
+
:root {
|
| 7 |
+
--peach: #ffad7a;
|
| 8 |
+
--peach-dark: #e8935c;
|
| 9 |
+
--lavender: #b8a9d9;
|
| 10 |
+
--sky-blue: #7accff;
|
| 11 |
+
--ink: #1f2937;
|
| 12 |
+
--slate: #4b5563;
|
| 13 |
+
--muted: #6b7280;
|
| 14 |
+
--bg-light: #f9fafb;
|
| 15 |
+
--surface: #ffffff;
|
| 16 |
+
--surface-soft: #fff7f1;
|
| 17 |
+
--border-default: #e5e7eb;
|
| 18 |
+
--border-subtle: #f3f4f6;
|
| 19 |
+
--accent-subtle: rgba(255, 173, 122, 0.14);
|
| 20 |
+
--shadow-sm: 0 1px 2px rgba(31, 41, 55, 0.05);
|
| 21 |
+
--shadow-md: 0 8px 24px rgba(31, 41, 55, 0.08);
|
| 22 |
+
--shadow-lg: 0 18px 48px rgba(31, 41, 55, 0.12);
|
| 23 |
+
--radius: 8px;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
html {
|
| 27 |
+
scroll-behavior: smooth;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
body,
|
| 31 |
+
.gradio-container {
|
| 32 |
+
background:
|
| 33 |
+
radial-gradient(circle at 7% 4%, rgba(255, 173, 122, 0.22), transparent 28%),
|
| 34 |
+
radial-gradient(circle at 88% 8%, rgba(122, 204, 255, 0.20), transparent 30%),
|
| 35 |
+
linear-gradient(180deg, #ffffff 0%, var(--bg-light) 44%, #f7f2fb 100%) !important;
|
| 36 |
+
color: var(--ink) !important;
|
| 37 |
+
font-family: Inter, "Helvetica Neue", "Segoe UI", system-ui, -apple-system, sans-serif !important;
|
| 38 |
+
font-weight: 400;
|
| 39 |
+
letter-spacing: 0;
|
| 40 |
+
-webkit-font-smoothing: antialiased;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.gradio-container {
|
| 44 |
+
max-width: 1180px !important;
|
| 45 |
+
margin: 0 auto !important;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
.main,
|
| 49 |
+
.block-container {
|
| 50 |
+
background: transparent !important;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
.block-container {
|
| 54 |
+
max-width: 1180px;
|
| 55 |
+
padding-top: 2rem;
|
| 56 |
+
padding-bottom: 2rem;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
/* Shared hero used by Gradio helpers and Streamlit HTML. */
|
| 60 |
+
.hf-hero,
|
| 61 |
+
.hero {
|
| 62 |
+
position: relative;
|
| 63 |
+
overflow: hidden;
|
| 64 |
+
background:
|
| 65 |
+
radial-gradient(circle at 20% 8%, rgba(255, 255, 255, 0.52), transparent 26%),
|
| 66 |
+
linear-gradient(135deg, var(--peach) 0%, var(--lavender) 54%, var(--sky-blue) 100%);
|
| 67 |
+
border: 1px solid rgba(255, 255, 255, 0.72);
|
| 68 |
+
border-radius: 16px;
|
| 69 |
+
box-shadow: var(--shadow-lg), 0 0 34px rgba(255, 173, 122, 0.18);
|
| 70 |
+
color: #ffffff;
|
| 71 |
+
margin: 1rem 0 1.25rem 0;
|
| 72 |
+
padding: clamp(1.25rem, 3vw, 2.25rem);
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
.hf-hero::after,
|
| 76 |
+
.hero::after {
|
| 77 |
+
content: "";
|
| 78 |
+
position: absolute;
|
| 79 |
+
inset: auto -12% -45% auto;
|
| 80 |
+
width: 360px;
|
| 81 |
+
height: 360px;
|
| 82 |
+
background: rgba(255, 255, 255, 0.22);
|
| 83 |
+
border-radius: 999px;
|
| 84 |
+
pointer-events: none;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.hf-hero-grid,
|
| 88 |
+
.hf-hero-copy {
|
| 89 |
+
position: relative;
|
| 90 |
+
z-index: 1;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.hf-hero-copy {
|
| 94 |
+
display: flex;
|
| 95 |
+
align-items: flex-start;
|
| 96 |
+
gap: 1rem;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
.hf-icon {
|
| 100 |
+
align-items: center;
|
| 101 |
+
background: rgba(255, 255, 255, 0.24);
|
| 102 |
+
border: 1px solid rgba(255, 255, 255, 0.45);
|
| 103 |
+
border-radius: 8px;
|
| 104 |
+
box-shadow: var(--shadow-sm);
|
| 105 |
+
display: inline-flex;
|
| 106 |
+
flex: 0 0 auto;
|
| 107 |
+
font-size: 1.7rem;
|
| 108 |
+
height: 3.75rem;
|
| 109 |
+
justify-content: center;
|
| 110 |
+
width: 3.75rem;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
.hf-badge {
|
| 114 |
+
background: rgba(255, 255, 255, 0.22);
|
| 115 |
+
border: 1px solid rgba(255, 255, 255, 0.42);
|
| 116 |
+
border-radius: 999px;
|
| 117 |
+
color: rgba(255, 255, 255, 0.96);
|
| 118 |
+
display: inline-flex;
|
| 119 |
+
font-size: 0.76rem;
|
| 120 |
+
font-weight: 800;
|
| 121 |
+
letter-spacing: 0.06em;
|
| 122 |
+
margin-bottom: 0.7rem;
|
| 123 |
+
padding: 0.34rem 0.7rem;
|
| 124 |
+
text-transform: uppercase;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
.hf-hero h1,
|
| 128 |
+
.hero h1 {
|
| 129 |
+
color: #ffffff !important;
|
| 130 |
+
font-size: clamp(2rem, 4vw, 3.35rem);
|
| 131 |
+
font-weight: 800;
|
| 132 |
+
letter-spacing: 0;
|
| 133 |
+
line-height: 1.04;
|
| 134 |
+
margin: 0 0 0.45rem 0;
|
| 135 |
+
text-shadow: 0 2px 12px rgba(31, 41, 55, 0.18);
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.hf-hero p,
|
| 139 |
+
.hero p {
|
| 140 |
+
color: rgba(255, 255, 255, 0.96) !important;
|
| 141 |
+
font-size: 1.03rem;
|
| 142 |
+
line-height: 1.65;
|
| 143 |
+
margin: 0;
|
| 144 |
+
max-width: 68ch;
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
.hf-chip-row,
|
| 148 |
+
.pill-row {
|
| 149 |
+
display: flex;
|
| 150 |
+
flex-wrap: wrap;
|
| 151 |
+
gap: 0.55rem;
|
| 152 |
+
margin-top: 1.15rem;
|
| 153 |
+
position: relative;
|
| 154 |
+
z-index: 1;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.hf-chip,
|
| 158 |
+
.badge,
|
| 159 |
+
.tech-tag {
|
| 160 |
+
background: rgba(255, 255, 255, 0.24);
|
| 161 |
+
border: 1px solid rgba(255, 255, 255, 0.45);
|
| 162 |
+
border-radius: 999px;
|
| 163 |
+
color: #ffffff;
|
| 164 |
+
display: inline-flex;
|
| 165 |
+
font-size: 0.86rem;
|
| 166 |
+
font-weight: 700;
|
| 167 |
+
padding: 0.42rem 0.75rem;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
.hf-method-grid {
|
| 171 |
+
display: grid;
|
| 172 |
+
gap: 0.9rem;
|
| 173 |
+
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
|
| 174 |
+
margin: 1rem 0;
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
.hf-method-card,
|
| 178 |
+
.glass-card,
|
| 179 |
+
.project-card,
|
| 180 |
+
.info-card,
|
| 181 |
+
.metric-card,
|
| 182 |
+
.stat-box,
|
| 183 |
+
.gradio-container .form,
|
| 184 |
+
.gradio-container .panel {
|
| 185 |
+
background: rgba(255, 255, 255, 0.88) !important;
|
| 186 |
+
border: 1px solid var(--border-default) !important;
|
| 187 |
+
border-radius: var(--radius) !important;
|
| 188 |
+
box-shadow: var(--shadow-md) !important;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
.hf-method-card {
|
| 192 |
+
padding: 1rem;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
.hf-method-card span {
|
| 196 |
+
color: var(--peach-dark);
|
| 197 |
+
display: block;
|
| 198 |
+
font-size: 0.78rem;
|
| 199 |
+
font-weight: 800;
|
| 200 |
+
letter-spacing: 0.04em;
|
| 201 |
+
margin-bottom: 0.35rem;
|
| 202 |
+
text-transform: uppercase;
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
.hf-method-card p {
|
| 206 |
+
color: var(--slate);
|
| 207 |
+
line-height: 1.55;
|
| 208 |
+
margin: 0;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
.project-card,
|
| 212 |
+
.info-card,
|
| 213 |
+
.metric-card,
|
| 214 |
+
.stat-box {
|
| 215 |
+
padding: 1rem;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
h1,
|
| 219 |
+
h2,
|
| 220 |
+
h3,
|
| 221 |
+
h4,
|
| 222 |
+
.markdown-text h1,
|
| 223 |
+
.markdown-text h2,
|
| 224 |
+
.markdown-text h3 {
|
| 225 |
+
color: var(--ink) !important;
|
| 226 |
+
font-family: Inter, "Helvetica Neue", system-ui, sans-serif !important;
|
| 227 |
+
letter-spacing: 0;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
p,
|
| 231 |
+
li,
|
| 232 |
+
label,
|
| 233 |
+
.markdown-text,
|
| 234 |
+
.markdown-text p,
|
| 235 |
+
.markdown-text span {
|
| 236 |
+
color: var(--slate) !important;
|
| 237 |
+
font-family: Inter, "Helvetica Neue", system-ui, sans-serif !important;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
strong {
|
| 241 |
+
color: var(--ink);
|
| 242 |
+
font-weight: 700;
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
input,
|
| 246 |
+
select,
|
| 247 |
+
textarea,
|
| 248 |
+
.gr-textbox,
|
| 249 |
+
.gr-dropdown {
|
| 250 |
+
background: #ffffff !important;
|
| 251 |
+
border: 1px solid var(--border-default) !important;
|
| 252 |
+
border-radius: var(--radius) !important;
|
| 253 |
+
color: var(--ink) !important;
|
| 254 |
+
font-family: Inter, "Helvetica Neue", system-ui, sans-serif !important;
|
| 255 |
+
transition: border-color 0.15s ease, box-shadow 0.15s ease !important;
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
input:focus,
|
| 259 |
+
select:focus,
|
| 260 |
+
textarea:focus,
|
| 261 |
+
.gr-textbox:focus {
|
| 262 |
+
border-color: var(--peach) !important;
|
| 263 |
+
box-shadow: 0 0 0 3px var(--accent-subtle) !important;
|
| 264 |
+
outline: none !important;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
button.primary,
|
| 268 |
+
button[class*="primary"],
|
| 269 |
+
div[data-testid="stButton"] > button {
|
| 270 |
+
background: linear-gradient(135deg, var(--peach) 0%, var(--peach-dark) 100%) !important;
|
| 271 |
+
border: 0 !important;
|
| 272 |
+
border-radius: var(--radius) !important;
|
| 273 |
+
box-shadow: 0 8px 20px rgba(255, 173, 122, 0.28) !important;
|
| 274 |
+
color: #ffffff !important;
|
| 275 |
+
font-family: Inter, "Helvetica Neue", system-ui, sans-serif !important;
|
| 276 |
+
font-weight: 800 !important;
|
| 277 |
+
padding: 0.72rem 1.1rem !important;
|
| 278 |
+
transition: transform 0.18s ease, box-shadow 0.18s ease !important;
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
button.primary:hover,
|
| 282 |
+
button[class*="primary"]:hover,
|
| 283 |
+
div[data-testid="stButton"] > button:hover {
|
| 284 |
+
box-shadow: 0 12px 26px rgba(255, 173, 122, 0.36) !important;
|
| 285 |
+
transform: translateY(-1px) !important;
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
button.secondary,
|
| 289 |
+
button[class*="secondary"] {
|
| 290 |
+
background: #ffffff !important;
|
| 291 |
+
border: 1px solid var(--border-default) !important;
|
| 292 |
+
border-radius: var(--radius) !important;
|
| 293 |
+
color: var(--ink) !important;
|
| 294 |
+
font-weight: 700 !important;
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
code,
|
| 298 |
+
pre {
|
| 299 |
+
border-radius: var(--radius) !important;
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
code {
|
| 303 |
+
background: #fff2e8 !important;
|
| 304 |
+
color: #9a4f1f !important;
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
pre {
|
| 308 |
+
background: #111827 !important;
|
| 309 |
+
border: 1px solid #273244 !important;
|
| 310 |
+
color: #f9fafb !important;
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
table {
|
| 314 |
+
border-collapse: collapse;
|
| 315 |
+
width: 100%;
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
th {
|
| 319 |
+
background: #fff2e8;
|
| 320 |
+
color: var(--ink);
|
| 321 |
+
font-weight: 800;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
td,
|
| 325 |
+
th {
|
| 326 |
+
border-bottom: 1px solid var(--border-default);
|
| 327 |
+
padding: 0.7rem;
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
blockquote,
|
| 331 |
+
.markdown-text blockquote {
|
| 332 |
+
background: #faf9fc !important;
|
| 333 |
+
border-left: 3px solid var(--lavender) !important;
|
| 334 |
+
border-radius: 0 var(--radius) var(--radius) 0 !important;
|
| 335 |
+
color: var(--slate) !important;
|
| 336 |
+
margin: 0.5rem 0 !important;
|
| 337 |
+
padding: 0.75rem 1rem !important;
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
a {
|
| 341 |
+
color: #2774a9 !important;
|
| 342 |
+
font-weight: 700;
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
.hf-footer {
|
| 346 |
+
border-top: 1px solid var(--border-default);
|
| 347 |
+
color: var(--muted);
|
| 348 |
+
font-size: 0.92rem;
|
| 349 |
+
margin-top: 1.5rem;
|
| 350 |
+
padding: 1rem 0;
|
| 351 |
+
text-align: center;
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
.hf-footer strong {
|
| 355 |
+
color: var(--ink);
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
/* Streamlit shell polish. */
|
| 359 |
+
div[data-testid="stHeader"],
|
| 360 |
+
div[data-testid="stToolbar"] {
|
| 361 |
+
background: transparent !important;
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
div[data-testid="stSidebar"] {
|
| 365 |
+
background: rgba(255, 255, 255, 0.82) !important;
|
| 366 |
+
border-right: 1px solid var(--border-default);
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
div[data-baseweb="input"],
|
| 370 |
+
div[data-baseweb="textarea"],
|
| 371 |
+
div[data-baseweb="select"] {
|
| 372 |
+
background: #ffffff !important;
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
div[data-testid="stTextInput"] input,
|
| 376 |
+
div[data-testid="stTextArea"] textarea,
|
| 377 |
+
div[data-testid="stSelectbox"] div {
|
| 378 |
+
border-radius: var(--radius) !important;
|
| 379 |
+
border-color: var(--border-default) !important;
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
div[data-testid="stMetric"] {
|
| 383 |
+
background: rgba(255, 255, 255, 0.9);
|
| 384 |
+
border: 1px solid var(--border-default);
|
| 385 |
+
border-radius: var(--radius);
|
| 386 |
+
box-shadow: var(--shadow-md);
|
| 387 |
+
padding: 0.8rem 1rem;
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
.stPlotlyChart {
|
| 391 |
+
background: rgba(255, 255, 255, 0.86);
|
| 392 |
+
border: 1px solid var(--border-default);
|
| 393 |
+
border-radius: var(--radius);
|
| 394 |
+
box-shadow: var(--shadow-sm);
|
| 395 |
+
padding: 0.3rem;
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
@keyframes fadeIn {
|
| 399 |
+
from {
|
| 400 |
+
opacity: 0;
|
| 401 |
+
transform: translateY(8px);
|
| 402 |
+
}
|
| 403 |
+
to {
|
| 404 |
+
opacity: 1;
|
| 405 |
+
transform: translateY(0);
|
| 406 |
+
}
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
.hf-hero,
|
| 410 |
+
.hf-method-card,
|
| 411 |
+
.metric-card,
|
| 412 |
+
.info-card {
|
| 413 |
+
animation: fadeIn 0.28s ease-out;
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
@media (max-width: 720px) {
|
| 417 |
+
.hf-hero-copy {
|
| 418 |
+
flex-direction: column;
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
.hf-icon {
|
| 422 |
+
height: 3.2rem;
|
| 423 |
+
width: 3.2rem;
|
| 424 |
+
}
|
| 425 |
+
}
|
shared/utils.py
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HF-Master Shared Utilities
|
| 3 |
+
Helper functions for all projects
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import re
|
| 8 |
+
import json
|
| 9 |
+
import hashlib
|
| 10 |
+
from typing import Dict, List, Optional, Any, Union
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
import sqlite3
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def load_env(var_name: str, default: Optional[str] = None) -> Optional[str]:
|
| 17 |
+
"""Load environment variable with optional default"""
|
| 18 |
+
return os.getenv(var_name, default)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def load_api_key(provider: str = "openai") -> Optional[str]:
|
| 22 |
+
"""Load API key for specified provider"""
|
| 23 |
+
key_map = {
|
| 24 |
+
"openai": "OPENAI_API_KEY",
|
| 25 |
+
"anthropic": "ANTHROPIC_API_KEY",
|
| 26 |
+
"huggingface": "HF_TOKEN",
|
| 27 |
+
"cohere": "COHERE_API_KEY",
|
| 28 |
+
"together": "TOGETHER_API_KEY"
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
env_var = key_map.get(provider.lower())
|
| 32 |
+
if env_var:
|
| 33 |
+
return load_env(env_var)
|
| 34 |
+
|
| 35 |
+
return None
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def estimate_token_count(text: str, model: str = "gpt-4") -> int:
|
| 39 |
+
"""Estimate token count for text"""
|
| 40 |
+
tokens_per_word = {
|
| 41 |
+
"gpt-4": 4, # ~4 chars per token
|
| 42 |
+
"gpt-3.5": 4,
|
| 43 |
+
"claude": 4,
|
| 44 |
+
"llama": 3 # More efficient
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
chars_per_token = tokens_per_word.get(model, 4)
|
| 48 |
+
return len(text) // chars_per_token
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def estimate_tokens(text: str, model: str = "gpt-4") -> int:
|
| 52 |
+
"""Backward-compatible alias used by older apps"""
|
| 53 |
+
return estimate_token_count(text, model)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def calculate_api_cost(
|
| 57 |
+
model: str,
|
| 58 |
+
input_tokens: int,
|
| 59 |
+
output_tokens: int,
|
| 60 |
+
provider: str = "openai"
|
| 61 |
+
) -> float:
|
| 62 |
+
"""Calculate API cost for model usage"""
|
| 63 |
+
|
| 64 |
+
pricing = {
|
| 65 |
+
"openai": {
|
| 66 |
+
"gpt-4": {"input": 0.03, "output": 0.06},
|
| 67 |
+
"gpt-3.5-turbo": {"input": 0.001, "output": 0.002},
|
| 68 |
+
"gpt-4-turbo": {"input": 0.01, "output": 0.03}
|
| 69 |
+
},
|
| 70 |
+
"anthropic": {
|
| 71 |
+
"claude-3-opus": {"input": 0.015, "output": 0.075},
|
| 72 |
+
"claude-3-sonnet": {"input": 0.003, "output": 0.015}
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
provider_pricing = pricing.get(provider, {})
|
| 77 |
+
model_pricing = provider_pricing.get(model, {"input": 0.01, "output": 0.03})
|
| 78 |
+
|
| 79 |
+
input_cost = (input_tokens / 1000) * model_pricing["input"]
|
| 80 |
+
output_cost = (output_tokens / 1000) * model_pricing["output"]
|
| 81 |
+
|
| 82 |
+
return input_cost + output_cost
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def calculate_cost(tokens: int, model: str = "gpt-4", provider: str = "openai") -> float:
|
| 86 |
+
"""Backward-compatible alias used by older apps"""
|
| 87 |
+
return calculate_api_cost(model=model, input_tokens=tokens, output_tokens=0, provider=provider)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def sanitize_filename(name: str) -> str:
|
| 91 |
+
"""Convert string to safe filename"""
|
| 92 |
+
name = name.lower().strip()
|
| 93 |
+
name = re.sub(r'[^\w\s-]', '', name)
|
| 94 |
+
name = re.sub(r'[\s]+', '-', name)
|
| 95 |
+
return name
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def create_hash(text: str, length: int = 8) -> str:
|
| 99 |
+
"""Create short hash from text"""
|
| 100 |
+
return hashlib.md5(text.encode()).hexdigest()[:length]
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def format_duration(seconds: float) -> str:
|
| 104 |
+
"""Format duration in human-readable form"""
|
| 105 |
+
if seconds < 60:
|
| 106 |
+
return f"{seconds:.1f}s"
|
| 107 |
+
elif seconds < 3600:
|
| 108 |
+
return f"{seconds/60:.1f}m"
|
| 109 |
+
else:
|
| 110 |
+
return f"{seconds/3600:.1f}h"
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def format_bytes(bytes: int) -> str:
|
| 114 |
+
"""Format bytes in human-readable form"""
|
| 115 |
+
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
| 116 |
+
if bytes < 1024:
|
| 117 |
+
return f"{bytes:.1f} {unit}"
|
| 118 |
+
bytes /= 1024
|
| 119 |
+
return f"{bytes:.1f} PB"
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def truncate_text(text: str, max_length: int = 100, suffix: str = "...") -> str:
|
| 123 |
+
"""Truncate text with suffix"""
|
| 124 |
+
if len(text) <= max_length:
|
| 125 |
+
return text
|
| 126 |
+
return text[:max_length - len(suffix)] + suffix
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def parse_dice_notation(notation: str) -> Dict[str, Any]:
|
| 130 |
+
"""Parse dice notation like 2d6+3"""
|
| 131 |
+
match = re.match(r'(\d+)d(\d+)(kh\d+)?([+-]\d+)?', notation.upper())
|
| 132 |
+
if not match:
|
| 133 |
+
raise ValueError(f"Invalid dice notation: {notation}")
|
| 134 |
+
|
| 135 |
+
num_dice = int(match.group(1))
|
| 136 |
+
die_size = int(match.group(2))
|
| 137 |
+
keep_high = match.group(3)
|
| 138 |
+
modifier = int(match.group(4)) if match.group(4) else 0
|
| 139 |
+
|
| 140 |
+
return {
|
| 141 |
+
"num_dice": num_dice,
|
| 142 |
+
"die_size": die_size,
|
| 143 |
+
"keep_high": keep_high,
|
| 144 |
+
"modifier": modifier
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def roll_dice(notation: str) -> List[int]:
|
| 149 |
+
"""Roll dice and return individual rolls"""
|
| 150 |
+
import random
|
| 151 |
+
|
| 152 |
+
parsed = parse_dice_notation(notation)
|
| 153 |
+
rolls = [random.randint(1, parsed["die_size"]) for _ in range(parsed["num_dice"])]
|
| 154 |
+
|
| 155 |
+
if parsed["keep_high"]:
|
| 156 |
+
keep = int(parsed["keep_high"][2:])
|
| 157 |
+
rolls = sorted(rolls, reverse=True)[:keep]
|
| 158 |
+
|
| 159 |
+
return rolls
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def calculate_modifier(ability_score: int) -> int:
|
| 163 |
+
"""Calculate D&D ability modifier from score"""
|
| 164 |
+
return (ability_score - 10) // 2
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def validate_ethereum_address(address: str) -> bool:
|
| 168 |
+
"""Validate Ethereum address format"""
|
| 169 |
+
pattern = r'^0x[a-fA-F0-9]{40}$'
|
| 170 |
+
return bool(re.match(pattern, address))
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def validate_solana_address(address: str) -> bool:
|
| 174 |
+
"""Validate Solana address format"""
|
| 175 |
+
pattern = r'^[1-9A-HJ-NP-Za-km-z]{32,44}$'
|
| 176 |
+
return bool(re.match(pattern, address))
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def extract_urls(text: str) -> List[str]:
|
| 180 |
+
"""Extract URLs from text"""
|
| 181 |
+
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
|
| 182 |
+
return re.findall(url_pattern, text)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def extract_code_blocks(text: str) -> List[str]:
|
| 186 |
+
"""Extract code blocks from markdown text"""
|
| 187 |
+
pattern = r'```(?:\w+)?\n(.*?)```'
|
| 188 |
+
return re.findall(pattern, text, re.DOTALL)
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def parse_math_expression(expr: str) -> float:
|
| 192 |
+
"""Safely evaluate simple math expressions"""
|
| 193 |
+
allowed_chars = set("0123456789+-*/.() ")
|
| 194 |
+
if all(c in allowed_chars for c in expr):
|
| 195 |
+
return eval(expr)
|
| 196 |
+
raise ValueError(f"Unsafe expression: {expr}")
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def create_timer(func):
|
| 200 |
+
"""Decorator to time function execution"""
|
| 201 |
+
import time
|
| 202 |
+
from functools import wraps
|
| 203 |
+
|
| 204 |
+
@wraps(func)
|
| 205 |
+
def wrapper(*args, **kwargs):
|
| 206 |
+
start = time.time()
|
| 207 |
+
result = func(*args, **kwargs)
|
| 208 |
+
duration = time.time() - start
|
| 209 |
+
print(f"{func.__name__} took {format_duration(duration)}")
|
| 210 |
+
return result
|
| 211 |
+
|
| 212 |
+
return wrapper
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def retry_on_failure(max_attempts: int = 3, delay: float = 1.0):
|
| 216 |
+
"""Decorator to retry function on failure"""
|
| 217 |
+
from functools import wraps
|
| 218 |
+
import time
|
| 219 |
+
|
| 220 |
+
def decorator(func):
|
| 221 |
+
@wraps(func)
|
| 222 |
+
def wrapper(*args, **kwargs):
|
| 223 |
+
for attempt in range(max_attempts):
|
| 224 |
+
try:
|
| 225 |
+
return func(*args, **kwargs)
|
| 226 |
+
except Exception as e:
|
| 227 |
+
if attempt == max_attempts - 1:
|
| 228 |
+
raise
|
| 229 |
+
time.sleep(delay * (attempt + 1))
|
| 230 |
+
|
| 231 |
+
return wrapper
|
| 232 |
+
|
| 233 |
+
return decorator
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
class SimpleCache:
|
| 237 |
+
"""Simple in-memory cache"""
|
| 238 |
+
|
| 239 |
+
def __init__(self, max_size: int = 100):
|
| 240 |
+
self.cache: Dict[str, Any] = {}
|
| 241 |
+
self.max_size = max_size
|
| 242 |
+
self.access_times: Dict[str, datetime] = {}
|
| 243 |
+
|
| 244 |
+
def get(self, key: str) -> Optional[Any]:
|
| 245 |
+
"""Get value from cache"""
|
| 246 |
+
if key in self.cache:
|
| 247 |
+
self.access_times[key] = datetime.now()
|
| 248 |
+
return self.cache[key]
|
| 249 |
+
return None
|
| 250 |
+
|
| 251 |
+
def set(self, key: str, value: Any):
|
| 252 |
+
"""Set value in cache"""
|
| 253 |
+
if len(self.cache) >= self.max_size:
|
| 254 |
+
oldest = min(self.access_times.items(), key=lambda x: x[1])[0]
|
| 255 |
+
del self.cache[oldest]
|
| 256 |
+
del self.access_times[oldest]
|
| 257 |
+
|
| 258 |
+
self.cache[key] = value
|
| 259 |
+
self.access_times[key] = datetime.now()
|
| 260 |
+
|
| 261 |
+
def clear(self):
|
| 262 |
+
"""Clear cache"""
|
| 263 |
+
self.cache.clear()
|
| 264 |
+
self.access_times.clear()
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
class Database:
|
| 268 |
+
"""Simple SQLite wrapper"""
|
| 269 |
+
|
| 270 |
+
def __init__(self, db_path: str = "data.db"):
|
| 271 |
+
self.db_path = db_path
|
| 272 |
+
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
| 273 |
+
self.conn = None
|
| 274 |
+
|
| 275 |
+
def connect(self):
|
| 276 |
+
"""Connect to database"""
|
| 277 |
+
self.conn = sqlite3.connect(self.db_path)
|
| 278 |
+
self.conn.row_factory = sqlite3.Row
|
| 279 |
+
|
| 280 |
+
def close(self):
|
| 281 |
+
"""Close database connection"""
|
| 282 |
+
if self.conn:
|
| 283 |
+
self.conn.close()
|
| 284 |
+
|
| 285 |
+
def execute(self, query: str, params: tuple = ()) -> sqlite3.Cursor:
|
| 286 |
+
"""Execute query"""
|
| 287 |
+
if not self.conn:
|
| 288 |
+
self.connect()
|
| 289 |
+
return self.conn.execute(query, params)
|
| 290 |
+
|
| 291 |
+
def commit(self):
|
| 292 |
+
"""Commit transaction"""
|
| 293 |
+
if self.conn:
|
| 294 |
+
self.conn.commit()
|
| 295 |
+
|
| 296 |
+
def fetchall(self, query: str, params: tuple = ()) -> List[Dict]:
|
| 297 |
+
"""Fetch all results"""
|
| 298 |
+
cursor = self.execute(query, params)
|
| 299 |
+
return [dict(row) for row in cursor.fetchall()]
|
| 300 |
+
|
| 301 |
+
def fetchone(self, query: str, params: tuple = ()) -> Optional[Dict]:
|
| 302 |
+
"""Fetch one result"""
|
| 303 |
+
cursor = self.execute(query, params)
|
| 304 |
+
row = cursor.fetchone()
|
| 305 |
+
return dict(row) if row else None
|
| 306 |
+
|
| 307 |
+
def create_table(self, name: str, columns: Dict[str, str]):
|
| 308 |
+
"""Create table with columns"""
|
| 309 |
+
cols = ", ".join([f"{k} {v}" for k, v in columns.items()])
|
| 310 |
+
self.execute(f"CREATE TABLE IF NOT EXISTS {name} ({cols})")
|
| 311 |
+
self.commit()
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def load_json_file(filepath: str) -> Dict:
|
| 315 |
+
"""Load JSON file"""
|
| 316 |
+
with open(filepath, 'r') as f:
|
| 317 |
+
return json.load(f)
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
def save_json_file(data: Dict, filepath: str):
|
| 321 |
+
"""Save JSON file"""
|
| 322 |
+
Path(filepath).parent.mkdir(parents=True, exist_ok=True)
|
| 323 |
+
with open(filepath, 'w') as f:
|
| 324 |
+
json.dump(data, f, indent=2)
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
def merge_dicts(*dicts: Dict) -> Dict:
|
| 328 |
+
"""Merge multiple dictionaries"""
|
| 329 |
+
result = {}
|
| 330 |
+
for d in dicts:
|
| 331 |
+
result.update(d)
|
| 332 |
+
return result
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
def flatten_list(nested: List[Any]) -> List[Any]:
|
| 336 |
+
"""Flatten nested list"""
|
| 337 |
+
result = []
|
| 338 |
+
for item in nested:
|
| 339 |
+
if isinstance(item, list):
|
| 340 |
+
result.extend(flatten_list(item))
|
| 341 |
+
else:
|
| 342 |
+
result.append(item)
|
| 343 |
+
return result
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
def chunk_text(text: str, chunk_size: int, overlap: int = 0) -> List[str]:
|
| 347 |
+
"""Split text into overlapping chunks"""
|
| 348 |
+
chunks = []
|
| 349 |
+
start = 0
|
| 350 |
+
|
| 351 |
+
while start < len(text):
|
| 352 |
+
end = start + chunk_size
|
| 353 |
+
chunks.append(text[start:end])
|
| 354 |
+
start = end - overlap
|
| 355 |
+
|
| 356 |
+
return chunks
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
def get_project_root() -> Path:
|
| 360 |
+
"""Get project root directory"""
|
| 361 |
+
return Path(__file__).parent.parent
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
def ensure_dir(path: str):
|
| 365 |
+
"""Ensure directory exists"""
|
| 366 |
+
Path(path).mkdir(parents=True, exist_ok=True)
|