# app.py import gradio as gr from huggingface_hub import hf_hub_download from huggingface_hub import HfApi import fasttext import os import numpy as np from functools import lru_cache import json import time from typing import List, Tuple, Optional, Dict, Any from collections import defaultdict, deque import hashlib import uuid import tempfile import requests import webbrowser # ------------------------- # Styles # ------------------------- styles = """ body{ background : #161616; } #button { background: linear-gradient(to right, #6A359C, #B589D6); color: #efefef; font-weight: 600; border: none; border-radius: 8px; margin : 8px auto; transition: all 0.3s ease; } #button_green { background: linear-gradient(to right, #18de78, #50eb9b); color: #efefef; font-weight: 600; border: none; width: 50%; color : #1d1d1d; margin : 8px auto; border-radius: 8px; transition: all 0.3s ease; } #button:hover { background: linear-gradient(to right, #5A2D8C, #A579C6); transform: translateY(-2px); box-shadow: 0 4px 12px rgba(106, 53, 156, 0.3); } a{ color : #1baaf2; text-decoration: none; } .normal-text{ font-size: 25px; } """ # ------------------------- # Website References # ------------------------- website = 'https://ai.remeinium.com' docs = 'https://esdocs.ai.remeinium.com' js_docs = 'https://esdocs.ai.remeinium.com/api-reference/introduction#javascript' cu_docs = 'https://esdocs.ai.remeinium.com/api-reference/introduction#curl' status = 'https://stats.uptimerobot.com/HZFBOsSvBT' model = 'https://huggingface.com/Remeinium/UgannA_SiyabasaV2' # ------------------------- # Model Loading # ------------------------- HF_TOKEN = os.getenv("HF_TOKEN") if not HF_TOKEN: raise EnvironmentError("HF_TOKEN is not set. Please add it in Space Settings > Secrets.") try: print("Downloading UgannA_SiyabasaV2 model...") model_path = hf_hub_download( repo_id="Remeinium/UgannA_SiyabasaV2", filename="UgannA_SiyabasaV2.bin", token=HF_TOKEN, repo_type="model" ) model = fasttext.load_model(model_path) print("Model loaded successfully!") MODEL_INFO = { "name": "UgannA_SiyabasaV2", "version": "2.0", "dimensions": model.get_dimension(), "vocabulary_size": len(model.get_words()), "language": "Sinhala", "architecture": "FastText" } except Exception as e: raise RuntimeError(f"Failed to load model: {str(e)}") # ------------------------- # Rate Limiting # ------------------------- class RateLimiter: def __init__(self): self.requests = defaultdict(deque) self.user_limits = defaultdict(deque) # limits self.limits = { "per_minute": 120, "per_hour": 2000, "per_day": 100000 } def check_limit(self, client_id: str, user_id: str = None) -> Tuple[bool, Dict[str, Any]]: now = time.time() identifier = user_id if user_id else client_id client_requests = self.requests[identifier] # Clean old requests (24 hour window) while client_requests and client_requests[0] < now - 86400: client_requests.popleft() current_count = len(client_requests) # Check daily limit if current_count >= self.limits["per_day"]: return False, { "allowed": False, "limit": self.limits["per_day"], "current": current_count, "reset_in": 86400 - (now - client_requests[0]) if client_requests else 86400 } # Check hourly limit hourly_requests = [req for req in client_requests if req > now - 3600] if len(hourly_requests) >= self.limits["per_hour"]: return False, { "allowed": False, "limit": self.limits["per_hour"], "current": len(hourly_requests), "reset_in": 3600 - (now - hourly_requests[0]) if hourly_requests else 3600 } # Check minute-level minute_requests = [req for req in client_requests if req > now - 60] if len(minute_requests) >= self.limits["per_minute"]: return False, { "allowed": False, "limit": self.limits["per_minute"], "current": len(minute_requests), "reset_in": 60 - (now - minute_requests[0]) if minute_requests else 60 } # Allow request client_requests.append(now) return True, { "allowed": True, "limits": self.limits, "current_daily": current_count + 1, "remaining_daily": self.limits["per_day"] - current_count - 1 } rate_limiter = RateLimiter() # ------------------------- # Core Embedding Functions # ------------------------- def enhanced_embedding_response(original_result, text, endpoint_type="word"): """Enhance the response with additional metadata""" if "error" in original_result: return original_result # Common metadata original_result["model"] = "UgannA_SiyabasaV2" original_result["language"] = "Sinhala" original_result["dimensions"] = 300 # Format based on endpoint type if endpoint_type == "word": return { "text": text, "embedding": original_result.get("embedding", []), "dimensions": original_result["dimensions"], "model": original_result["model"], "language": original_result["language"] } elif endpoint_type == "sentence": return { "sentence": text, "embedding": original_result.get("embedding", []), "dimensions": original_result["dimensions"], "tokens": original_result.get("tokens", []), "token_count": original_result.get("token_count", 0), "model": original_result["model"], "language": original_result["language"] } else: # For similarity and neighbors return original_result def safe_strip(s: Optional[str]) -> str: return "" if s is None else s.strip() @lru_cache(maxsize=1) def load_vocab_and_matrix(max_words: int = 500000): try: words = model.get_words()[:max_words] vectors = [model.get_word_vector(w) for w in words] mat = np.vstack(vectors).astype(np.float32) norms = np.linalg.norm(mat, axis=1, keepdims=True) norms[norms == 0.0] = 1.0 mat_norm = mat / norms return words, mat, mat_norm except Exception: raise RuntimeError("Failed to load vocabulary matrix") def cosine_similarity_vec(u: np.ndarray, mat_norm: np.ndarray) -> np.ndarray: u_norm = np.linalg.norm(u) if u_norm == 0: return np.zeros(mat_norm.shape[0], dtype=np.float32) u = (u / u_norm).astype(np.float32) return np.dot(mat_norm, u) def get_embedding(word: str) -> Dict[str, Any]: word = safe_strip(word) if not word: return {"error": "Please provide a Sinhala word"} try: emb = model.get_word_vector(word) base_result = { "word": word, "embedding": emb.tolist(), "dimensions": len(emb) } return enhanced_embedding_response(base_result, word, "word") except Exception as e: return {"error": f"Failed to generate embedding: {str(e)}"} def word_similarity(word1: str, word2: str) -> Dict[str, Any]: word1, word2 = safe_strip(word1), safe_strip(word2) if not word1 or not word2: return {"error": "Both words are required"} try: v1, v2 = model.get_word_vector(word1), model.get_word_vector(word2) denom = (np.linalg.norm(v1) * np.linalg.norm(v2)) similarity = float(np.dot(v1, v2) / denom) if denom != 0 else 0.0 base_result = { "word1": word1, "word2": word2, "similarity": round(similarity, 6) } return enhanced_embedding_response(base_result, f"{word1} vs {word2}", "similarity") except Exception as e: return {"error": f"Similarity computation failed: {str(e)}"} def nearest_neighbors(word: str, top_k: int = 10) -> Dict[str, Any]: word = safe_strip(word) if not word: return {"error": "Word input required"} try: words, mat, mat_norm = load_vocab_and_matrix() vec = model.get_word_vector(word) sims = cosine_similarity_vec(vec, mat_norm) indices = np.argsort(-sims)[:top_k + 1] results = [] for i in indices: neighbor = words[i] score = float(sims[i]) if neighbor != word: results.append({"word": neighbor, "similarity": round(score, 6)}) if len(results) >= top_k: break base_result = { "query": word, "neighbors": results } return enhanced_embedding_response(base_result, word, "neighbors") except Exception as e: return {"error": f"Neighbor search failed: {str(e)}"} def sentence_embedding(sentence: str) -> Dict[str, Any]: sentence = safe_strip(sentence) if not sentence: return {"error": "Sentence input required"} try: tokens = [t for t in sentence.split() if t.strip()] if not tokens: return {"error": "No valid tokens found"} vectors = [model.get_word_vector(token) for token in tokens] avg_vector = np.mean(vectors, axis=0) base_result = { "sentence": sentence, "embedding": avg_vector.tolist(), "tokens": tokens, "token_count": len(tokens) } return enhanced_embedding_response(base_result, sentence, "sentence") except Exception as e: return {"error": f"Sentence embedding failed: {str(e)}"} def sentence_similarity(sentence1: str, sentence2: str) -> Dict[str, Any]: try: emb1 = sentence_embedding(sentence1) emb2 = sentence_embedding(sentence2) if "error" in emb1 or "error" in emb2: return {"error": emb1.get("error", emb2.get("error"))} v1 = np.array(emb1["embedding"]) v2 = np.array(emb2["embedding"]) denom = (np.linalg.norm(v1) * np.linalg.norm(v2)) similarity = float(np.dot(v1, v2) / denom) if denom != 0 else 0.0 base_result = { "sentence1": sentence1, "sentence2": sentence2, "similarity": round(similarity, 6) } return enhanced_embedding_response(base_result, f"{sentence1} vs {sentence2}", "sentence_similarity") except Exception as e: return {"error": f"Sentence similarity failed: {str(e)}"} # ------------------------- # Document Search # ------------------------- def parse_uploaded_documents(file): if file is None: return {"error": "Please upload a file (txt/csv)."} try: if hasattr(file, 'name'): file_path = file.name else: file_path = str(file) with open(file_path, 'r', encoding='utf-8') as f: raw = f.read() except UnicodeDecodeError: try: with open(file_path, 'r', encoding='latin-1') as f: raw = f.read() except Exception as e: return {"error": f"Encoding error: {str(e)}"} except Exception as e: return {"error": f"File reading error: {str(e)}"} docs = [] if "," in raw and raw.count(",") > raw.count("\n"): for line in raw.splitlines(): if line.strip(): docs.append(line.strip()) else: for line in raw.splitlines(): if line.strip(): docs.append(line.strip()) if not docs: return {"error": "No documents found in the file"} return {"documents": docs} def index_documents_for_search(docs: List[str]): if not docs: return {"error": "The file was empty"} try: vecs = [] for d in docs: tokens = [t for t in d.split() if t.strip()] if not tokens: vecs.append(np.zeros((model.get_dimension(),), dtype=np.float32)) continue mats = np.vstack([model.get_word_vector(t) for t in tokens]) vecs.append(mats.mean(axis=0)) M = np.vstack(vecs).astype(np.float32) norms = np.linalg.norm(M, axis=1, keepdims=True) norms[norms == 0] = 1.0 M_norm = M / norms return {"matrix": M, "matrix_norm": M_norm, "docs": docs} except Exception as e: return {"error": f"Error while data indexing: {str(e)}"} def search_documents(query: str, indexed): q = safe_strip(query) if not q: return {"error": "Enter a query to search"} try: q_tokens = [t for t in q.split() if t.strip()] if not q_tokens: return {"error": "Couldn't extract tokens from query"} q_vecs = np.vstack([model.get_word_vector(t) for t in q_tokens]) q_avg = q_vecs.mean(axis=0) q_norm = np.linalg.norm(q_avg) if q_norm == 0: sims = np.zeros(indexed["matrix_norm"].shape[0], dtype=np.float32) else: q_avg = (q_avg / q_norm).astype(np.float32) sims = np.dot(indexed["matrix_norm"], q_avg) idx = np.argsort(-sims)[:10] results = [] for i in idx: results.append({"document": indexed["docs"][i], "score": float(round(sims[i], 6))}) return {"query": q, "results": results} except Exception as e: return {"error": f"Search failed: {str(e)}"} # ------------------------- # API Platform # ------------------------- def create_api_platform(): with gr.Column(): # Quick Start Section gr.Markdown("## Quick start") gr.Markdown("Get started with the `Embedding_Siyabasa API` in minutes.") with gr.Tabs(): with gr.TabItem("🐍 Python"): gr.Markdown(""" ```python from gradio_client import Client client = Client("Remeinium/Embedding_Siyabasa") result = client.predict( word="අම්මා", api_name="/get_embedding" ) print(json.dumps(result, indent=4)) ``` """) gr.Markdown(""" #### **Accepts 1 parameter:** - `word` : `string` _\*Required_ - The input value that is provided in the "Sinhala Word" Textbox component. #### **Returns 1 element** - `str | float | bool | list | dict` - The output value that appears in the "Embedding Vector" Json component. """) # API Endpoints Section gr.Markdown("## API endpoints") # Word Embedding Endpoint with gr.Accordion("GET WORD EMBEDDING", open=True): gr.Markdown(""" Get the embedding vector for a Sinhala word. **Python example:** ```python from gradio_client import Client client = Client("Remeinium/Embedding_Siyabasa") result = client.predict( word="අම්මා", api_name="/get_embedding" ) print(json.dumps(result, indent=4)) ``` **Response format:** ```json { "text": "අම්මා", "embedding": [0.123, -0.456, 0.789, ...], "dimensions": 300, "model": "UgannA_SiyabasaV2", "language": "Sinhala" } ``` """) gr.Markdown(""" #### **Accepts 1 parameter:** - `word` : `string` _\*Required_ - The input value that is provided in the "Sinhala Word" Textbox component. #### **Returns 1 element** - `str | float | bool | list | dict` - The output value that appears in the "Embedding Vector" Json component. """) # Word Similarity Endpoint with gr.Accordion("GET WORD SIMILARITY", open=False): gr.Markdown(""" Compute the similarity between two Sinhala words. **Python example:** ```python from gradio_client import Client client = Client("Remeinium/Embedding_Siyabasa") result = client.predict( word1="අම්මා", word2="තාත්තා", api_name="/word_similarity" ) print(json.dumps(result, indent=4)) ``` **Response format:** ```json { "word1": "අම්මා", "word2": "තාත්තා", "similarity": 0.856234, "model": "UgannA_SiyabasaV2" } ``` """) gr.Markdown(""" #### **Accepts 2 parameters:** 1. `word1` :`string` \*_Required_ - The input value that is provided in the "Word 1" Textbox component. 2. `word2` : `string` \*_Required_ - The input value that is provided in the "Word 2: Textbox component. #### **Returns 1 element** `str | float | bool | list | dict` - The output value that appears in the "Similarity Result" Json component. """) # Nearest Neighbors Endpoint with gr.Accordion("GET NEAREST NEIGHBORS", open=False): gr.Markdown(""" Find semantically similar words for a given Sinhala word. **Python example:** ```python from gradio_client import Client client = Client("Remeinium/Embedding_Siyabasa") result = client.predict( word="පෞරාණික", top_k=5, api_name="/nearest_neighbors" ) print(json.dumps(result, indent=4)) ``` **Response format:** ```json { "query": "පෞරාණික", "neighbors": [ {"word": "ඉපැරණි", "similarity": 0.755...}, {"word": "පුරාවිද්යාත්මක", "similarity": 0.749...}, ... ], "model": "UgannA_SiyabasaV2" } ``` """) gr.Markdown(""" #### **Accepts 2 parameters:** 1. `word` : `str` \*_Required_ - The input value that is provided in the "Query Word" Textbox component. 2. `top_k` : `float` _Default: 10_ - The input value that is provided in the "Number of Results" Slider component. #### **Returns 1 element** `str | float | bool | list | dict` - The output value that appears in the "Similar Words" Json component.""") # Sentence Embedding Endpoint with gr.Accordion("GET SENTENCE EMBEDDING", open=False): gr.Markdown(""" Get the embedding vector for a Sinhala sentence. **Python example:** ```python from gradio_client import Client client = Client("Remeinium/Embedding_Siyabasa") result = client.predict( sentence="මම පාසලට යමි", api_name="/sentence_embedding" ) print(json.dumps(result, indent=4)) ``` **Response format:** ```json { "sentence": "මම පාසලට යමි", "embedding": [0.123, -0.456, 0.789, ...], "dimensions": 300, "tokens": ["මම", "පාසලට", "යමි"], "model": "UgannA_SiyabasaV2" } ``` """) gr.Markdown(""" #### Accepts 1 parameter: - `sentence` : `str` \*_Required_ - The input value that is provided in the "Sinhala Sentence" Textbox component. #### **Returns 1 element** `str | float | bool | list | dict` - The output value that appears in the "Sentence Embedding" Json component. """) # Sentence Similarity Endpoint with gr.Accordion("GET SENTENCE SIMILARITY", open=False): gr.Markdown(""" Compute the similarity between two Sinhala sentences. **Python example:** ```python from gradio_client import Client client = Client("Remeinium/Embedding_Siyabasa") result = client.predict( sentence1="මම පාසලට යමි", sentence2="ඔහු පාසලට යයි", api_name="/sentence_similarity" ) print(json.dumps(result, indent=4)) ``` **Response format:** ```json { "sentence1": "මම පාසලට යමි", "sentence2": "ඔහු පාසලට යයි", "similarity": 0.734567, "model": "UgannA_SiyabasaV2" } ``` """) gr.Markdown(""" **Accepts 2 parameters:** 1. `sentence1` : `str` \*_Required_ - The input value that is provided in the "Sentence A" Textbox component. 2. `sentence2` : `str` \*_Required_ - The input value that is provided in the "Sentence B" Textbox component. #### **Returns 1 element** `str | float | bool | list | dict` - The output value that appears in the "Sentence Similarity" Json component. """) # Document Search Endpoints with gr.Accordion("DOCUMENT SEARCH", open=False): gr.Markdown(""" Upload documents and perform semantic search. **Step 1: Index documents** ```python from gradio_client import Client, handle_file client = Client("Remeinium/Embedding_Siyabasa") result = client.predict( file=handle_file('path/to/documents.txt'), api_name="/_index_upload" ) print(json.dumps(result, indent=4)) ``` """) gr.Markdown(""" #### **Accepts 1 parameter:** 1. `file` : `filepath` \*_Required_ The input value that is provided in the "Upload .txt or .csv File" File component. The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded. Attributes: path: The server file path where the file is stored. url: The normalized server URL pointing to the file. size: The size of the file in bytes. orig_name: The original filename before upload. mime_type: The MIME type of the file. is_stream: Indicates whether the file is a stream. meta: Additional metadata used internally (should not be changed). #### **Returns tuple of 2 elements** 1. `dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)` - The output value that appears in the `value_45` Dataframe component. 2. `str` - The output value that appears in the "Status" Textbox component. """) gr.Markdown(""" **Step 2: Search documents** ```python from gradio_client import Client client = Client("Remeinium/Embedding_Siyabasa") result = client.predict( query="සිංහල භාෂාව", topn_=5, api_name="/_search_wrapper" ) print(json.dumps(result, indent=4)) ``` """) gr.Markdown(""" ### **Accepts 2 parameters:** 1. `query` : `string` \*_Required_ - The input value that is provided in the `Search Query` Textbox component. 2. `topn_` : `float` _Default 5_ - The input value that is provided in the "Number of Results" Slider component. #### **Returns 1 element** `str | float | bool | list | dict` - The output value that appears in the `Search Results` Json component. """) with gr.TabItem(" JavaScript"): gr.Markdown(""" ```javascript import { Client } from "@gradio/client"; const client = await Client.connect("Remeinium/Embedding_Siyabasa"); const result = await client.predict("/get_embedding", { word: "අම්මා" }); console.log(result.data); ``` """) web_btn_js = gr.Button("Refer the Complete Javascript API Documentation", elem_id="button_green") js_code = f"() => window.open('{cu_docs}', '_blank')" web_btn_js.click(None, None, None, js=js_code) with gr.TabItem("␥ cURL"): gr.Markdown(""" ```bash curl -X POST https://remeinium-embedding-siyabasa.hf.space/gradio_api/call/get_embedding \\ -H "Content-Type: application/json" \\ -d '{"data": ["අම්මා"]}' | awk -F'"' '{ print $4}' | read EVENT_ID; \\ curl -N https://remeinium-embedding-siyabasa.hf.space/gradio_api/call/get_embedding/$EVENT_ID ``` """) web_btn_cu = gr.Button("Refer the Complete cURL API Documentation", elem_id="button_green") js_code = f"() => window.open('{cu_docs}', '_blank')" web_btn_cu.click(None, None, None, js=js_code) # Model Information gr.Markdown("## Model Details") gr.Markdown(""" | Property | Description | |----------|-------------| | **Model**| Embedding_Siyabasa API
`UgannA_SiyabasaV2` | | **Supported data types**
Input
Output |
Text
Text embeddings | | **Token limits**
Input token limit
Output dimension size |
1000
300 | | **Version**
Model
API |
V_2.0
V_1.0| | **Latest update** | August 2025 | | **Language** | `Sinhala` only | """) # Usage and Limits gr.Markdown("## Usage and limits") gr.Markdown(""" - **Always Free**: Unlimited requests (subject to fair usage) - **Rate limits**: Applied only during high traffic to ensure service stability """) # Support gr.Markdown("## Support") gr.Markdown(""" - **Read Official Documentation.** - **Technical support**: support@remeinium.com - **Bug reports**: Create an issue in the Space discussions - **Feature requests**: Contact support@remeinium.com > **Note**: This API is designed specifically for **Sinhala** language processing and **may not work with other languages.** """) web_btn_site = gr.Button("Visit Remeinium AI", elem_id="button_green") js_code = f"() => window.open('{website}', '_blank')" web_btn_site.click(None, None, None, js=js_code) # ------------------------- # Main Application # ------------------------- with gr.Blocks(title="Sinhala Embeddings API", css=styles) as demo: gr.Markdown(""" # 🇱🇰 Embedding_Siyabasa - Sinhala | An Advanced Embeddings API for Sinhala Language ## Welcome to the official HuggingFace Space for _Embedding Siyabasa_ The `Embedding_Siyabasa API` provides high-quality text embedding models specifically designed for the `Sinhala` language. Generate embeddings for Sinhala words, phrases, and sentences using our latest model `UgannA_SiyabasaV2`. These language-specific embeddings power advanced **NLP tasks such as semantic search, text classification, and document clustering**, delivering more accurate and context-aware results than traditional keyword-based approaches. Get the Model (`UgannA_SiyabasaV2`): https://huggingface.co/Remeinium/UgannA_SiyabasaV2 **Key features:** - **Language-specific**: Optimized exclusively for Sinhala text - **300-dimensional embeddings**: Rich semantic representations - **FastText architecture**: Proven performance for morphologically rich languages """) with gr.Row(): web_btn = gr.Button("Refer the Complete API Documentation", elem_id="button_green") js_code = f"() => window.open('{docs}', '_blank')" web_btn.click(None, None, None, js=js_code) web_btn_site = gr.Button("Visit Remeinium AI", elem_id="button") js_code = f"() => window.open('{website}', '_blank')" web_btn_site.click(None, None, None, js=js_code) with gr.Tabs(): # Playground with gr.TabItem("🧩 Embedding Playground"): gr.Markdown("## Explore Model Capabilities") gr.Markdown("Test the model directly without API access requirements.") # Word Embedding with gr.Row(): inp = gr.Textbox(label="Sinhala Word", placeholder="අම්මා, සියබස, නූතන") out = gr.JSON(label="Embedding Vector") gr.Examples( examples=[["අම්මා"], ["සියබස"], ["නූතන"], ["ප්‍රජාතන්ත්‍රවාදය"]], inputs=inp, outputs=out, fn=get_embedding, cache_examples=True ) btn = gr.Button("Get Embedding", elem_id="button") btn.click(fn=get_embedding, inputs=inp, outputs=out) # Word Similarity gr.Markdown("### Word Similarity") with gr.Row(): ws_a = gr.Textbox(label="Word A", placeholder="අම්මා") ws_b = gr.Textbox(label="Word B", placeholder="තාත්තා") ws_out = gr.JSON(label="Similarity Result") ws_btn = gr.Button("Compare Words", elem_id="button") ws_btn.click(fn=word_similarity, inputs=[ws_a, ws_b], outputs=ws_out) # Nearest Neighbors gr.Markdown("### Semantic Search") with gr.Row(): nn_word = gr.Textbox(label="Query Word", placeholder="පෞරාණික") nn_k = gr.Slider(1, 50, 10, label="Number of Results") nn_out = gr.JSON(label="Similar Words") gr.Examples( examples=[["අම්මා"], ["සියබස"], ["නූතන"], ["ප්‍රජාතන්ත්‍රවාදය"]], inputs=nn_word, outputs=nn_out, fn=nearest_neighbors, cache_examples=True ) nn_btn = gr.Button("Find Similar Words", elem_id="button") nn_btn.click(fn=nearest_neighbors, inputs=[nn_word, nn_k], outputs=nn_out) # Sentence Operations gr.Markdown("### Sentence Operations") with gr.Row(): sent_inp = gr.Textbox(label="Sinhala Sentence", placeholder="මම පාසලට යමි") sent_out = gr.JSON(label="Sentence Embedding") gr.Examples( examples=[["මම පාසලට යමි"], ["ආරෝග්‍යා පරමා ලාභා"], ["ඔබට බොහොම ස්තුතියි."]], inputs=sent_inp, outputs=sent_out, fn=sentence_embedding, cache_examples=True ) sent_btn = gr.Button("Get Sentence Embedding", elem_id="button") sent_btn.click(fn=sentence_embedding, inputs=sent_inp, outputs=sent_out) with gr.Row(): sa = gr.Textbox(label="Sentence A", placeholder="මම පාසලට යමි") sb = gr.Textbox(label="Sentence B", placeholder="ඔහු පාසලට යයි") ssim_out = gr.JSON(label="Sentence Similarity") ssim_btn = gr.Button("Compare Sentences", elem_id="button") ssim_btn.click(fn=sentence_similarity, inputs=[sa, sb], outputs=ssim_out) # Document Search gr.Markdown("### Document Semantic Search") gr.Markdown("Upload a text file (one document per line) for semantic search.") status_display = gr.Textbox(label="Status", value="Ready to upload documents", interactive=False) with gr.Row(): upload = gr.File(label="Upload .txt or .csv File", file_count="single") docs_list = gr.Dataframe(headers=["Document Preview"], interactive=False) idx_btn = gr.Button("Index Documents", elem_id="button") indexed_state = gr.State(value=None) def _index_upload(file): if file is None: return None, gr.update(value=[]), "Please upload a file first" parsed = parse_uploaded_documents(file) if "error" in parsed: return None, gr.update(value=[]), parsed["error"] docs = parsed["documents"] indexed = index_documents_for_search(docs) if "error" in indexed: return None, gr.update(value=[]), indexed["error"] preview = [[(d[:200] + "..." if len(d) > 200 else d)] for d in docs[:20]] return indexed, gr.update(value=preview), f"Indexed {len(docs)} documents" idx_btn.click(_index_upload, inputs=[upload], outputs=[indexed_state, docs_list, status_display]) with gr.Row(): q = gr.Textbox(label="Search Query") topn = gr.Slider(1, 20, 5, label="Number of Results") results_out = gr.JSON(label="Search Results") def _search_wrapper(query, topn_, state): if state is None: return {"error": "Please index documents first"} res = search_documents(query, state) if "results" in res: res["results"] = res["results"][:int(topn_)] return res search_btn = gr.Button("Search Documents", elem_id="button") search_btn.click(fn=_search_wrapper, inputs=[q, topn, indexed_state], outputs=[results_out]) # API Platform Tab with gr.TabItem("⚡ API Platform"): create_api_platform() with gr.TabItem("💡 Status"): # gr.Markdown("Check at : https://stats.uptimerobot.com/HZFBOsSvBT") web_btn_status = gr.Button("Check Status", elem_id="button") js_code = f"() => window.open('{status}', '_blank')" web_btn_status.click(None, None, None, js=js_code) gr.Markdown(""" --- *✨ **Remeinium AI** · _Intelligence for a greater tomorrow._* """) if __name__ == "__main__": # demo.queue(default_concurrency_limit=10, max_size=20).launch() demo.launch()