thekusaldarshana's picture
Update app.py
8f94d61 verified
# app.py
import gradio as gr
from huggingface_hub import hf_hub_download
from huggingface_hub import HfApi
import fasttext
import os
import numpy as np
from functools import lru_cache
import json
import time
from typing import List, Tuple, Optional, Dict, Any
from collections import defaultdict, deque
import hashlib
import uuid
import tempfile
import requests
import webbrowser
# -------------------------
# Styles
# -------------------------
styles = """
body{
background : #161616;
}
#button {
background: linear-gradient(to right, #6A359C, #B589D6);
color: #efefef;
font-weight: 600;
border: none;
border-radius: 8px;
margin : 8px auto;
transition: all 0.3s ease;
}
#button_green {
background: linear-gradient(to right, #18de78, #50eb9b);
color: #efefef;
font-weight: 600;
border: none;
width: 50%;
color : #1d1d1d;
margin : 8px auto;
border-radius: 8px;
transition: all 0.3s ease;
}
#button:hover {
background: linear-gradient(to right, #5A2D8C, #A579C6);
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(106, 53, 156, 0.3);
}
a{
color : #1baaf2;
text-decoration: none;
}
.normal-text{
font-size: 25px;
}
"""
# -------------------------
# Website References
# -------------------------
website = 'https://ai.remeinium.com'
docs = 'https://esdocs.ai.remeinium.com'
js_docs = 'https://esdocs.ai.remeinium.com/api-reference/introduction#javascript'
cu_docs = 'https://esdocs.ai.remeinium.com/api-reference/introduction#curl'
status = 'https://stats.uptimerobot.com/HZFBOsSvBT'
model = 'https://huggingface.com/Remeinium/UgannA_SiyabasaV2'
# -------------------------
# Model Loading
# -------------------------
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
raise EnvironmentError("HF_TOKEN is not set. Please add it in Space Settings > Secrets.")
try:
print("Downloading UgannA_SiyabasaV2 model...")
model_path = hf_hub_download(
repo_id="Remeinium/UgannA_SiyabasaV2",
filename="UgannA_SiyabasaV2.bin",
token=HF_TOKEN,
repo_type="model"
)
model = fasttext.load_model(model_path)
print("Model loaded successfully!")
MODEL_INFO = {
"name": "UgannA_SiyabasaV2",
"version": "2.0",
"dimensions": model.get_dimension(),
"vocabulary_size": len(model.get_words()),
"language": "Sinhala",
"architecture": "FastText"
}
except Exception as e:
raise RuntimeError(f"Failed to load model: {str(e)}")
# -------------------------
# Rate Limiting
# -------------------------
class RateLimiter:
def __init__(self):
self.requests = defaultdict(deque)
self.user_limits = defaultdict(deque)
# limits
self.limits = {
"per_minute": 120,
"per_hour": 2000,
"per_day": 100000
}
def check_limit(self, client_id: str, user_id: str = None) -> Tuple[bool, Dict[str, Any]]:
now = time.time()
identifier = user_id if user_id else client_id
client_requests = self.requests[identifier]
# Clean old requests (24 hour window)
while client_requests and client_requests[0] < now - 86400:
client_requests.popleft()
current_count = len(client_requests)
# Check daily limit
if current_count >= self.limits["per_day"]:
return False, {
"allowed": False,
"limit": self.limits["per_day"],
"current": current_count,
"reset_in": 86400 - (now - client_requests[0]) if client_requests else 86400
}
# Check hourly limit
hourly_requests = [req for req in client_requests if req > now - 3600]
if len(hourly_requests) >= self.limits["per_hour"]:
return False, {
"allowed": False,
"limit": self.limits["per_hour"],
"current": len(hourly_requests),
"reset_in": 3600 - (now - hourly_requests[0]) if hourly_requests else 3600
}
# Check minute-level
minute_requests = [req for req in client_requests if req > now - 60]
if len(minute_requests) >= self.limits["per_minute"]:
return False, {
"allowed": False,
"limit": self.limits["per_minute"],
"current": len(minute_requests),
"reset_in": 60 - (now - minute_requests[0]) if minute_requests else 60
}
# Allow request
client_requests.append(now)
return True, {
"allowed": True,
"limits": self.limits,
"current_daily": current_count + 1,
"remaining_daily": self.limits["per_day"] - current_count - 1
}
rate_limiter = RateLimiter()
# -------------------------
# Core Embedding Functions
# -------------------------
def enhanced_embedding_response(original_result, text, endpoint_type="word"):
"""Enhance the response with additional metadata"""
if "error" in original_result:
return original_result
# Common metadata
original_result["model"] = "UgannA_SiyabasaV2"
original_result["language"] = "Sinhala"
original_result["dimensions"] = 300
# Format based on endpoint type
if endpoint_type == "word":
return {
"text": text,
"embedding": original_result.get("embedding", []),
"dimensions": original_result["dimensions"],
"model": original_result["model"],
"language": original_result["language"]
}
elif endpoint_type == "sentence":
return {
"sentence": text,
"embedding": original_result.get("embedding", []),
"dimensions": original_result["dimensions"],
"tokens": original_result.get("tokens", []),
"token_count": original_result.get("token_count", 0),
"model": original_result["model"],
"language": original_result["language"]
}
else:
# For similarity and neighbors
return original_result
def safe_strip(s: Optional[str]) -> str:
return "" if s is None else s.strip()
@lru_cache(maxsize=1)
def load_vocab_and_matrix(max_words: int = 500000):
try:
words = model.get_words()[:max_words]
vectors = [model.get_word_vector(w) for w in words]
mat = np.vstack(vectors).astype(np.float32)
norms = np.linalg.norm(mat, axis=1, keepdims=True)
norms[norms == 0.0] = 1.0
mat_norm = mat / norms
return words, mat, mat_norm
except Exception:
raise RuntimeError("Failed to load vocabulary matrix")
def cosine_similarity_vec(u: np.ndarray, mat_norm: np.ndarray) -> np.ndarray:
u_norm = np.linalg.norm(u)
if u_norm == 0:
return np.zeros(mat_norm.shape[0], dtype=np.float32)
u = (u / u_norm).astype(np.float32)
return np.dot(mat_norm, u)
def get_embedding(word: str) -> Dict[str, Any]:
word = safe_strip(word)
if not word:
return {"error": "Please provide a Sinhala word"}
try:
emb = model.get_word_vector(word)
base_result = {
"word": word,
"embedding": emb.tolist(),
"dimensions": len(emb)
}
return enhanced_embedding_response(base_result, word, "word")
except Exception as e:
return {"error": f"Failed to generate embedding: {str(e)}"}
def word_similarity(word1: str, word2: str) -> Dict[str, Any]:
word1, word2 = safe_strip(word1), safe_strip(word2)
if not word1 or not word2:
return {"error": "Both words are required"}
try:
v1, v2 = model.get_word_vector(word1), model.get_word_vector(word2)
denom = (np.linalg.norm(v1) * np.linalg.norm(v2))
similarity = float(np.dot(v1, v2) / denom) if denom != 0 else 0.0
base_result = {
"word1": word1,
"word2": word2,
"similarity": round(similarity, 6)
}
return enhanced_embedding_response(base_result, f"{word1} vs {word2}", "similarity")
except Exception as e:
return {"error": f"Similarity computation failed: {str(e)}"}
def nearest_neighbors(word: str, top_k: int = 10) -> Dict[str, Any]:
word = safe_strip(word)
if not word:
return {"error": "Word input required"}
try:
words, mat, mat_norm = load_vocab_and_matrix()
vec = model.get_word_vector(word)
sims = cosine_similarity_vec(vec, mat_norm)
indices = np.argsort(-sims)[:top_k + 1]
results = []
for i in indices:
neighbor = words[i]
score = float(sims[i])
if neighbor != word:
results.append({"word": neighbor, "similarity": round(score, 6)})
if len(results) >= top_k:
break
base_result = {
"query": word,
"neighbors": results
}
return enhanced_embedding_response(base_result, word, "neighbors")
except Exception as e:
return {"error": f"Neighbor search failed: {str(e)}"}
def sentence_embedding(sentence: str) -> Dict[str, Any]:
sentence = safe_strip(sentence)
if not sentence:
return {"error": "Sentence input required"}
try:
tokens = [t for t in sentence.split() if t.strip()]
if not tokens:
return {"error": "No valid tokens found"}
vectors = [model.get_word_vector(token) for token in tokens]
avg_vector = np.mean(vectors, axis=0)
base_result = {
"sentence": sentence,
"embedding": avg_vector.tolist(),
"tokens": tokens,
"token_count": len(tokens)
}
return enhanced_embedding_response(base_result, sentence, "sentence")
except Exception as e:
return {"error": f"Sentence embedding failed: {str(e)}"}
def sentence_similarity(sentence1: str, sentence2: str) -> Dict[str, Any]:
try:
emb1 = sentence_embedding(sentence1)
emb2 = sentence_embedding(sentence2)
if "error" in emb1 or "error" in emb2:
return {"error": emb1.get("error", emb2.get("error"))}
v1 = np.array(emb1["embedding"])
v2 = np.array(emb2["embedding"])
denom = (np.linalg.norm(v1) * np.linalg.norm(v2))
similarity = float(np.dot(v1, v2) / denom) if denom != 0 else 0.0
base_result = {
"sentence1": sentence1,
"sentence2": sentence2,
"similarity": round(similarity, 6)
}
return enhanced_embedding_response(base_result, f"{sentence1} vs {sentence2}", "sentence_similarity")
except Exception as e:
return {"error": f"Sentence similarity failed: {str(e)}"}
# -------------------------
# Document Search
# -------------------------
def parse_uploaded_documents(file):
if file is None:
return {"error": "Please upload a file (txt/csv)."}
try:
if hasattr(file, 'name'):
file_path = file.name
else:
file_path = str(file)
with open(file_path, 'r', encoding='utf-8') as f:
raw = f.read()
except UnicodeDecodeError:
try:
with open(file_path, 'r', encoding='latin-1') as f:
raw = f.read()
except Exception as e:
return {"error": f"Encoding error: {str(e)}"}
except Exception as e:
return {"error": f"File reading error: {str(e)}"}
docs = []
if "," in raw and raw.count(",") > raw.count("\n"):
for line in raw.splitlines():
if line.strip():
docs.append(line.strip())
else:
for line in raw.splitlines():
if line.strip():
docs.append(line.strip())
if not docs:
return {"error": "No documents found in the file"}
return {"documents": docs}
def index_documents_for_search(docs: List[str]):
if not docs:
return {"error": "The file was empty"}
try:
vecs = []
for d in docs:
tokens = [t for t in d.split() if t.strip()]
if not tokens:
vecs.append(np.zeros((model.get_dimension(),), dtype=np.float32))
continue
mats = np.vstack([model.get_word_vector(t) for t in tokens])
vecs.append(mats.mean(axis=0))
M = np.vstack(vecs).astype(np.float32)
norms = np.linalg.norm(M, axis=1, keepdims=True)
norms[norms == 0] = 1.0
M_norm = M / norms
return {"matrix": M, "matrix_norm": M_norm, "docs": docs}
except Exception as e:
return {"error": f"Error while data indexing: {str(e)}"}
def search_documents(query: str, indexed):
q = safe_strip(query)
if not q:
return {"error": "Enter a query to search"}
try:
q_tokens = [t for t in q.split() if t.strip()]
if not q_tokens:
return {"error": "Couldn't extract tokens from query"}
q_vecs = np.vstack([model.get_word_vector(t) for t in q_tokens])
q_avg = q_vecs.mean(axis=0)
q_norm = np.linalg.norm(q_avg)
if q_norm == 0:
sims = np.zeros(indexed["matrix_norm"].shape[0], dtype=np.float32)
else:
q_avg = (q_avg / q_norm).astype(np.float32)
sims = np.dot(indexed["matrix_norm"], q_avg)
idx = np.argsort(-sims)[:10]
results = []
for i in idx:
results.append({"document": indexed["docs"][i], "score": float(round(sims[i], 6))})
return {"query": q, "results": results}
except Exception as e:
return {"error": f"Search failed: {str(e)}"}
# -------------------------
# API Platform
# -------------------------
def create_api_platform():
with gr.Column():
# Quick Start Section
gr.Markdown("## Quick start")
gr.Markdown("Get started with the `Embedding_Siyabasa API` in minutes.")
with gr.Tabs():
with gr.TabItem("🐍 Python"):
gr.Markdown("""
```python
from gradio_client import Client
client = Client("Remeinium/Embedding_Siyabasa")
result = client.predict(
word="අම්මා",
api_name="/get_embedding"
)
print(json.dumps(result, indent=4))
```
""")
gr.Markdown("""
#### **Accepts 1 parameter:**
- `word` : `string` _<u>\*Required</u>_
- The input value that is provided in the "Sinhala Word" Textbox component.
#### **Returns 1 element**
- `str | float | bool | list | dict`
- The output value that appears in the "Embedding Vector" Json component.
""")
# API Endpoints Section
gr.Markdown("## API endpoints")
# Word Embedding Endpoint
with gr.Accordion("GET WORD EMBEDDING", open=True):
gr.Markdown("""
Get the embedding vector for a Sinhala word.
**Python example:**
```python
from gradio_client import Client
client = Client("Remeinium/Embedding_Siyabasa")
result = client.predict(
word="අම්මා",
api_name="/get_embedding"
)
print(json.dumps(result, indent=4))
```
**Response format:**
```json
{
"text": "අම්මා",
"embedding": [0.123, -0.456, 0.789, ...],
"dimensions": 300,
"model": "UgannA_SiyabasaV2",
"language": "Sinhala"
}
```
""")
gr.Markdown("""
#### **Accepts 1 parameter:**
- `word` : `string` _<u>\*Required</u>_
- The input value that is provided in the "Sinhala Word" Textbox component.
#### **Returns 1 element**
- `str | float | bool | list | dict`
- The output value that appears in the "Embedding Vector" Json component.
""")
# Word Similarity Endpoint
with gr.Accordion("GET WORD SIMILARITY", open=False):
gr.Markdown("""
Compute the similarity between two Sinhala words.
**Python example:**
```python
from gradio_client import Client
client = Client("Remeinium/Embedding_Siyabasa")
result = client.predict(
word1="අම්මා",
word2="තාත්තා",
api_name="/word_similarity"
)
print(json.dumps(result, indent=4))
```
**Response format:**
```json
{
"word1": "අම්මා",
"word2": "තාත්තා",
"similarity": 0.856234,
"model": "UgannA_SiyabasaV2"
}
```
""")
gr.Markdown("""
#### **Accepts 2 parameters:**
1. `word1` :`string` \*_<u>Required</u>_
- The input value that is provided in the "Word 1" Textbox component.
2. `word2` : `string` \*_<u>Required</u>_
- The input value that is provided in the "Word 2: Textbox component.
#### **Returns 1 element**
`str | float | bool | list | dict`
- The output value that appears in the "Similarity Result" Json component.
""")
# Nearest Neighbors Endpoint
with gr.Accordion("GET NEAREST NEIGHBORS", open=False):
gr.Markdown("""
Find semantically similar words for a given Sinhala word.
**Python example:**
```python
from gradio_client import Client
client = Client("Remeinium/Embedding_Siyabasa")
result = client.predict(
word="පෞරාණික",
top_k=5,
api_name="/nearest_neighbors"
)
print(json.dumps(result, indent=4))
```
**Response format:**
```json
{
"query": "පෞරාණික",
"neighbors": [
{"word": "ඉපැරණි", "similarity": 0.755...},
{"word": "පුරාවිද්යාත්මක", "similarity": 0.749...},
...
],
"model": "UgannA_SiyabasaV2"
}
```
""")
gr.Markdown("""
#### **Accepts 2 parameters:**
1. `word` : `str` \*_<u>Required</u>_
- The input value that is provided in the "Query Word" Textbox component.
2. `top_k` : `float` _Default: 10_
- The input value that is provided in the "Number of Results" Slider component.
#### **Returns 1 element**
`str | float | bool | list | dict`
- The output value that appears in the "Similar Words" Json component.""")
# Sentence Embedding Endpoint
with gr.Accordion("GET SENTENCE EMBEDDING", open=False):
gr.Markdown("""
Get the embedding vector for a Sinhala sentence.
**Python example:**
```python
from gradio_client import Client
client = Client("Remeinium/Embedding_Siyabasa")
result = client.predict(
sentence="මම පාසලට යමි",
api_name="/sentence_embedding"
)
print(json.dumps(result, indent=4))
```
**Response format:**
```json
{
"sentence": "මම පාසලට යමි",
"embedding": [0.123, -0.456, 0.789, ...],
"dimensions": 300,
"tokens": ["මම", "පාසලට", "යමි"],
"model": "UgannA_SiyabasaV2"
}
```
""")
gr.Markdown("""
#### Accepts 1 parameter:
- `sentence` : `str` \*_<u>Required</u>_
- The input value that is provided in the "Sinhala Sentence" Textbox component.
#### **Returns 1 element**
`str | float | bool | list | dict`
- The output value that appears in the "Sentence Embedding" Json component.
""")
# Sentence Similarity Endpoint
with gr.Accordion("GET SENTENCE SIMILARITY", open=False):
gr.Markdown("""
Compute the similarity between two Sinhala sentences.
**Python example:**
```python
from gradio_client import Client
client = Client("Remeinium/Embedding_Siyabasa")
result = client.predict(
sentence1="මම පාසලට යමි",
sentence2="ඔහු පාසලට යයි",
api_name="/sentence_similarity"
)
print(json.dumps(result, indent=4))
```
**Response format:**
```json
{
"sentence1": "මම පාසලට යමි",
"sentence2": "ඔහු පාසලට යයි",
"similarity": 0.734567,
"model": "UgannA_SiyabasaV2"
}
```
""")
gr.Markdown("""
**Accepts 2 parameters:**
1. `sentence1` : `str` \*_<u>Required</u>_
- The input value that is provided in the "Sentence A" Textbox component.
2. `sentence2` : `str` \*_<u>Required</u>_
- The input value that is provided in the "Sentence B" Textbox component.
#### **Returns 1 element**
`str | float | bool | list | dict`
- The output value that appears in the "Sentence Similarity" Json component.
""")
# Document Search Endpoints
with gr.Accordion("DOCUMENT SEARCH", open=False):
gr.Markdown("""
Upload documents and perform semantic search.
**Step 1: Index documents**
```python
from gradio_client import Client, handle_file
client = Client("Remeinium/Embedding_Siyabasa")
result = client.predict(
file=handle_file('path/to/documents.txt'),
api_name="/_index_upload"
)
print(json.dumps(result, indent=4))
``` """)
gr.Markdown("""
#### **Accepts 1 parameter:**
1. `file` : `filepath` \*_<u>Required</u>_
The input value that is provided in the "Upload .txt or .csv File" File component. The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded. Attributes: path: The server file path where the file is stored. url: The normalized server URL pointing to the file. size: The size of the file in bytes. orig_name: The original filename before upload. mime_type: The MIME type of the file. is_stream: Indicates whether the file is a stream. meta: Additional metadata used internally (should not be changed).
#### **Returns tuple of 2 elements**
1. `dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)`
- The output value that appears in the `value_45` Dataframe component.
2. `str`
- The output value that appears in the "Status" Textbox component.
""")
gr.Markdown("""
**Step 2: Search documents**
```python
from gradio_client import Client
client = Client("Remeinium/Embedding_Siyabasa")
result = client.predict(
query="සිංහල භාෂාව",
topn_=5,
api_name="/_search_wrapper"
)
print(json.dumps(result, indent=4))
```
""")
gr.Markdown("""
### **Accepts 2 parameters:**
1. `query` : `string` \*_<u>Required</u>_
- The input value that is provided in the `Search Query` Textbox component.
2. `topn_` : `float` _Default 5_
- The input value that is provided in the "Number of Results" Slider component.
#### **Returns 1 element**
`str | float | bool | list | dict`
- The output value that appears in the `Search Results` Json component.
""")
with gr.TabItem("</> JavaScript"):
gr.Markdown("""
```javascript
import { Client } from "@gradio/client";
const client = await Client.connect("Remeinium/Embedding_Siyabasa");
const result = await client.predict("/get_embedding", {
word: "අම්මා"
});
console.log(result.data);
```
""")
web_btn_js = gr.Button("Refer the Complete Javascript API Documentation", elem_id="button_green")
js_code = f"() => window.open('{cu_docs}', '_blank')"
web_btn_js.click(None, None, None, js=js_code)
with gr.TabItem("␥ cURL"):
gr.Markdown("""
```bash
curl -X POST https://remeinium-embedding-siyabasa.hf.space/gradio_api/call/get_embedding \\
-H "Content-Type: application/json" \\
-d '{"data": ["අම්මා"]}' | awk -F'"' '{ print $4}' | read EVENT_ID; \\
curl -N https://remeinium-embedding-siyabasa.hf.space/gradio_api/call/get_embedding/$EVENT_ID
```
""")
web_btn_cu = gr.Button("Refer the Complete cURL API Documentation", elem_id="button_green")
js_code = f"() => window.open('{cu_docs}', '_blank')"
web_btn_cu.click(None, None, None, js=js_code)
# Model Information
gr.Markdown("## Model Details")
gr.Markdown("""
| Property | Description |
|----------|-------------|
| **Model**| Embedding_Siyabasa API<br>`UgannA_SiyabasaV2` |
| **Supported data types**<br>Input<br>Output | <br>Text<br>Text embeddings |
| **Token limits**<br>Input token limit<br>Output dimension size | <br>1000<br>300 |
| **Version**<br>Model<br>API | <br>V_2.0<br>V_1.0|
| **Latest update** | August 2025 |
| **Language** | `Sinhala` only |
""")
# Usage and Limits
gr.Markdown("## Usage and limits")
gr.Markdown("""
- **Always Free**: Unlimited requests (subject to fair usage)
- **Rate limits**: Applied only during high traffic to ensure service stability
""")
# Support
gr.Markdown("## Support")
gr.Markdown("""
- **Read Official <a href="https://esdocs.ai.remeinium.com" target="_blank">Documentation</a>.**
- **Technical support**: support@remeinium.com
- **Bug reports**: Create an issue in the Space discussions
- **Feature requests**: Contact support@remeinium.com
> **Note**: This API is designed specifically for **Sinhala** language processing and **may not work with other languages.**
""")
web_btn_site = gr.Button("Visit Remeinium AI", elem_id="button_green")
js_code = f"() => window.open('{website}', '_blank')"
web_btn_site.click(None, None, None, js=js_code)
# -------------------------
# Main Application
# -------------------------
with gr.Blocks(title="Sinhala Embeddings API", css=styles) as demo:
gr.Markdown("""
# 🇱🇰 Embedding_Siyabasa - Sinhala | An Advanced Embeddings API for Sinhala Language
## Welcome to the official HuggingFace Space for _Embedding Siyabasa_
The `Embedding_Siyabasa API` provides high-quality text embedding models specifically designed for the `Sinhala` language. Generate embeddings for Sinhala words, phrases, and sentences using our latest model `UgannA_SiyabasaV2`. These language-specific embeddings power advanced **NLP tasks such as semantic search, text classification, and document clustering**, delivering more accurate and context-aware results than traditional keyword-based approaches.
Get the Model (`UgannA_SiyabasaV2`): https://huggingface.co/Remeinium/UgannA_SiyabasaV2
**Key features:**
- **Language-specific**: Optimized exclusively for Sinhala text
- **300-dimensional embeddings**: Rich semantic representations
- **FastText architecture**: Proven performance for morphologically rich languages
""")
with gr.Row():
web_btn = gr.Button("Refer the Complete API Documentation", elem_id="button_green")
js_code = f"() => window.open('{docs}', '_blank')"
web_btn.click(None, None, None, js=js_code)
web_btn_site = gr.Button("Visit Remeinium AI", elem_id="button")
js_code = f"() => window.open('{website}', '_blank')"
web_btn_site.click(None, None, None, js=js_code)
with gr.Tabs():
# Playground
with gr.TabItem("🧩 Embedding Playground"):
gr.Markdown("## Explore Model Capabilities")
gr.Markdown("Test the model directly without API access requirements.")
# Word Embedding
with gr.Row():
inp = gr.Textbox(label="Sinhala Word", placeholder="අම්මා, සියබස, නූතන")
out = gr.JSON(label="Embedding Vector")
gr.Examples(
examples=[["අම්මා"], ["සියබස"], ["නූතන"], ["ප්‍රජාතන්ත්‍රවාදය"]],
inputs=inp, outputs=out, fn=get_embedding, cache_examples=True
)
btn = gr.Button("Get Embedding", elem_id="button")
btn.click(fn=get_embedding, inputs=inp, outputs=out)
# Word Similarity
gr.Markdown("### Word Similarity")
with gr.Row():
ws_a = gr.Textbox(label="Word A", placeholder="අම්මා")
ws_b = gr.Textbox(label="Word B", placeholder="තාත්තා")
ws_out = gr.JSON(label="Similarity Result")
ws_btn = gr.Button("Compare Words", elem_id="button")
ws_btn.click(fn=word_similarity, inputs=[ws_a, ws_b], outputs=ws_out)
# Nearest Neighbors
gr.Markdown("### Semantic Search")
with gr.Row():
nn_word = gr.Textbox(label="Query Word", placeholder="පෞරාණික")
nn_k = gr.Slider(1, 50, 10, label="Number of Results")
nn_out = gr.JSON(label="Similar Words")
gr.Examples(
examples=[["අම්මා"], ["සියබස"], ["නූතන"], ["ප්‍රජාතන්ත්‍රවාදය"]],
inputs=nn_word, outputs=nn_out, fn=nearest_neighbors, cache_examples=True
)
nn_btn = gr.Button("Find Similar Words", elem_id="button")
nn_btn.click(fn=nearest_neighbors, inputs=[nn_word, nn_k], outputs=nn_out)
# Sentence Operations
gr.Markdown("### Sentence Operations")
with gr.Row():
sent_inp = gr.Textbox(label="Sinhala Sentence", placeholder="මම පාසලට යමි")
sent_out = gr.JSON(label="Sentence Embedding")
gr.Examples(
examples=[["මම පාසලට යමි"], ["ආරෝග්‍යා පරමා ලාභා"], ["ඔබට බොහොම ස්තුතියි."]],
inputs=sent_inp, outputs=sent_out, fn=sentence_embedding, cache_examples=True
)
sent_btn = gr.Button("Get Sentence Embedding", elem_id="button")
sent_btn.click(fn=sentence_embedding, inputs=sent_inp, outputs=sent_out)
with gr.Row():
sa = gr.Textbox(label="Sentence A", placeholder="මම පාසලට යමි")
sb = gr.Textbox(label="Sentence B", placeholder="ඔහු පාසලට යයි")
ssim_out = gr.JSON(label="Sentence Similarity")
ssim_btn = gr.Button("Compare Sentences", elem_id="button")
ssim_btn.click(fn=sentence_similarity, inputs=[sa, sb], outputs=ssim_out)
# Document Search
gr.Markdown("### Document Semantic Search")
gr.Markdown("Upload a text file (one document per line) for semantic search.")
status_display = gr.Textbox(label="Status", value="Ready to upload documents", interactive=False)
with gr.Row():
upload = gr.File(label="Upload .txt or .csv File", file_count="single")
docs_list = gr.Dataframe(headers=["Document Preview"], interactive=False)
idx_btn = gr.Button("Index Documents", elem_id="button")
indexed_state = gr.State(value=None)
def _index_upload(file):
if file is None:
return None, gr.update(value=[]), "Please upload a file first"
parsed = parse_uploaded_documents(file)
if "error" in parsed:
return None, gr.update(value=[]), parsed["error"]
docs = parsed["documents"]
indexed = index_documents_for_search(docs)
if "error" in indexed:
return None, gr.update(value=[]), indexed["error"]
preview = [[(d[:200] + "..." if len(d) > 200 else d)] for d in docs[:20]]
return indexed, gr.update(value=preview), f"Indexed {len(docs)} documents"
idx_btn.click(_index_upload, inputs=[upload], outputs=[indexed_state, docs_list, status_display])
with gr.Row():
q = gr.Textbox(label="Search Query")
topn = gr.Slider(1, 20, 5, label="Number of Results")
results_out = gr.JSON(label="Search Results")
def _search_wrapper(query, topn_, state):
if state is None:
return {"error": "Please index documents first"}
res = search_documents(query, state)
if "results" in res:
res["results"] = res["results"][:int(topn_)]
return res
search_btn = gr.Button("Search Documents", elem_id="button")
search_btn.click(fn=_search_wrapper, inputs=[q, topn, indexed_state], outputs=[results_out])
# API Platform Tab
with gr.TabItem("⚡ API Platform"):
create_api_platform()
with gr.TabItem("💡 Status"):
# gr.Markdown("Check at : https://stats.uptimerobot.com/HZFBOsSvBT")
web_btn_status = gr.Button("Check Status", elem_id="button")
js_code = f"() => window.open('{status}', '_blank')"
web_btn_status.click(None, None, None, js=js_code)
gr.Markdown("""
---
*✨ **<a href="https://ai.remeinium.com" target="_blank">Remeinium AI</a>** · _Intelligence for a greater tomorrow._*
""")
if __name__ == "__main__":
# demo.queue(default_concurrency_limit=10, max_size=20).launch()
demo.launch()