# app.py
import gradio as gr
from huggingface_hub import hf_hub_download
from huggingface_hub import HfApi
import fasttext
import os
import numpy as np
from functools import lru_cache
import json
import time
from typing import List, Tuple, Optional, Dict, Any
from collections import defaultdict, deque
import hashlib
import uuid
import tempfile
import requests
import webbrowser

# -------------------------
#  Styles
# -------------------------
styles = """
body{
    background : #161616;
}
#button {
    background: linear-gradient(to right, #6A359C, #B589D6);
    color: #efefef;
    font-weight: 600;
    border: none;
    border-radius: 8px;
    margin : 8px auto;
    transition: all 0.3s ease;
}
#button_green {
    background: linear-gradient(to right, #18de78, #50eb9b);
    color: #efefef;
    font-weight: 600;
    border: none;
    width: 50%;
    color : #1d1d1d;
    margin : 8px auto;
    border-radius: 8px;
    transition: all 0.3s ease;
}
#button:hover {
    background: linear-gradient(to right, #5A2D8C, #A579C6);
    transform: translateY(-2px);
    box-shadow: 0 4px 12px rgba(106, 53, 156, 0.3);
}
a{
    color : #1baaf2;
    text-decoration: none;
}
.normal-text{
    font-size: 25px;
}
"""

# -------------------------
# Website References
# -------------------------
website = 'https://ai.remeinium.com'
docs = 'https://esdocs.ai.remeinium.com'
js_docs = 'https://esdocs.ai.remeinium.com/api-reference/introduction#javascript'
cu_docs = 'https://esdocs.ai.remeinium.com/api-reference/introduction#curl'
status = 'https://stats.uptimerobot.com/HZFBOsSvBT'
model = 'https://huggingface.com/Remeinium/UgannA_SiyabasaV2'

# -------------------------
# Model Loading
# -------------------------
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise EnvironmentError("HF_TOKEN is not set. Please add it in Space Settings > Secrets.")

try:
    print("Downloading UgannA_SiyabasaV2 model...")
    model_path = hf_hub_download(
        repo_id="Remeinium/UgannA_SiyabasaV2",
        filename="UgannA_SiyabasaV2.bin",
        token=HF_TOKEN,
        repo_type="model"
    )
    model = fasttext.load_model(model_path)
    print("Model loaded successfully!")
    MODEL_INFO = {
        "name": "UgannA_SiyabasaV2",
        "version": "2.0",
        "dimensions": model.get_dimension(),
        "vocabulary_size": len(model.get_words()),
        "language": "Sinhala",
        "architecture": "FastText"
    }
except Exception as e:
    raise RuntimeError(f"Failed to load model: {str(e)}")

# -------------------------
# Rate Limiting 
# -------------------------
class RateLimiter:
    def __init__(self):
        self.requests = defaultdict(deque)
        self.user_limits = defaultdict(deque)
        # limits
        self.limits = {
            "per_minute": 120,    
            "per_hour": 2000,     
            "per_day": 100000      
        }
    
    def check_limit(self, client_id: str, user_id: str = None) -> Tuple[bool, Dict[str, Any]]:
        now = time.time()
        identifier = user_id if user_id else client_id
        
        client_requests = self.requests[identifier]
        
        # Clean old requests (24 hour window)
        while client_requests and client_requests[0] < now - 86400:
            client_requests.popleft()
        
        current_count = len(client_requests)
        
        # Check daily limit
        if current_count >= self.limits["per_day"]:
            return False, {
                "allowed": False,
                "limit": self.limits["per_day"],
                "current": current_count,
                "reset_in": 86400 - (now - client_requests[0]) if client_requests else 86400
            }
        
        # Check hourly limit
        hourly_requests = [req for req in client_requests if req > now - 3600]
        if len(hourly_requests) >= self.limits["per_hour"]:
            return False, {
                "allowed": False,
                "limit": self.limits["per_hour"],
                "current": len(hourly_requests),
                "reset_in": 3600 - (now - hourly_requests[0]) if hourly_requests else 3600
            }
        
        # Check minute-level 
        minute_requests = [req for req in client_requests if req > now - 60]
        if len(minute_requests) >= self.limits["per_minute"]:
            return False, {
                "allowed": False,
                "limit": self.limits["per_minute"],
                "current": len(minute_requests),
                "reset_in": 60 - (now - minute_requests[0]) if minute_requests else 60
            }
        
        # Allow request
        client_requests.append(now)
        return True, {
            "allowed": True,
            "limits": self.limits,
            "current_daily": current_count + 1,
            "remaining_daily": self.limits["per_day"] - current_count - 1
        }

rate_limiter = RateLimiter()

# -------------------------
# Core Embedding Functions
# -------------------------
def enhanced_embedding_response(original_result, text, endpoint_type="word"):
    """Enhance the response with additional metadata"""
    if "error" in original_result:
        return original_result
    
    # Common metadata
    original_result["model"] = "UgannA_SiyabasaV2"
    original_result["language"] = "Sinhala"
    original_result["dimensions"] = 300
    
    # Format based on endpoint type
    if endpoint_type == "word":
        return {
            "text": text,
            "embedding": original_result.get("embedding", []),
            "dimensions": original_result["dimensions"],
            "model": original_result["model"],
            "language": original_result["language"]
        }
    elif endpoint_type == "sentence":
        return {
            "sentence": text,
            "embedding": original_result.get("embedding", []),
            "dimensions": original_result["dimensions"],
            "tokens": original_result.get("tokens", []),
            "token_count": original_result.get("token_count", 0),
            "model": original_result["model"],
            "language": original_result["language"]
        }
    else:
        # For similarity and neighbors
        return original_result

def safe_strip(s: Optional[str]) -> str:
    return "" if s is None else s.strip()

@lru_cache(maxsize=1)
def load_vocab_and_matrix(max_words: int = 500000):
    try:
        words = model.get_words()[:max_words]
        vectors = [model.get_word_vector(w) for w in words]
        mat = np.vstack(vectors).astype(np.float32)
        norms = np.linalg.norm(mat, axis=1, keepdims=True)
        norms[norms == 0.0] = 1.0
        mat_norm = mat / norms
        return words, mat, mat_norm
    except Exception:
        raise RuntimeError("Failed to load vocabulary matrix")

def cosine_similarity_vec(u: np.ndarray, mat_norm: np.ndarray) -> np.ndarray:
    u_norm = np.linalg.norm(u)
    if u_norm == 0:
        return np.zeros(mat_norm.shape[0], dtype=np.float32)
    u = (u / u_norm).astype(np.float32)
    return np.dot(mat_norm, u)

def get_embedding(word: str) -> Dict[str, Any]:
    word = safe_strip(word)
    if not word:
        return {"error": "Please provide a Sinhala word"}
    try:
        emb = model.get_word_vector(word)
        base_result = {
            "word": word,
            "embedding": emb.tolist(),
            "dimensions": len(emb)
        }
        return enhanced_embedding_response(base_result, word, "word")
    except Exception as e:
        return {"error": f"Failed to generate embedding: {str(e)}"}

def word_similarity(word1: str, word2: str) -> Dict[str, Any]:
    word1, word2 = safe_strip(word1), safe_strip(word2)
    if not word1 or not word2:
        return {"error": "Both words are required"}
    try:
        v1, v2 = model.get_word_vector(word1), model.get_word_vector(word2)
        denom = (np.linalg.norm(v1) * np.linalg.norm(v2))
        similarity = float(np.dot(v1, v2) / denom) if denom != 0 else 0.0
        base_result = {
            "word1": word1,
            "word2": word2,
            "similarity": round(similarity, 6)
        }
        return enhanced_embedding_response(base_result, f"{word1} vs {word2}", "similarity")
    except Exception as e:
        return {"error": f"Similarity computation failed: {str(e)}"}

def nearest_neighbors(word: str, top_k: int = 10) -> Dict[str, Any]:
    word = safe_strip(word)
    if not word:
        return {"error": "Word input required"}
    try:
        words, mat, mat_norm = load_vocab_and_matrix()
        vec = model.get_word_vector(word)
        sims = cosine_similarity_vec(vec, mat_norm)
        indices = np.argsort(-sims)[:top_k + 1]
        
        results = []
        for i in indices:
            neighbor = words[i]
            score = float(sims[i])
            if neighbor != word:
                results.append({"word": neighbor, "similarity": round(score, 6)})
            if len(results) >= top_k:
                break
                
        base_result = {
            "query": word,
            "neighbors": results
        }
        return enhanced_embedding_response(base_result, word, "neighbors")
    except Exception as e:
        return {"error": f"Neighbor search failed: {str(e)}"}

def sentence_embedding(sentence: str) -> Dict[str, Any]:
    sentence = safe_strip(sentence)
    if not sentence:
        return {"error": "Sentence input required"}
    try:
        tokens = [t for t in sentence.split() if t.strip()]
        if not tokens:
            return {"error": "No valid tokens found"}
        
        vectors = [model.get_word_vector(token) for token in tokens]
        avg_vector = np.mean(vectors, axis=0)
        
        base_result = {
            "sentence": sentence,
            "embedding": avg_vector.tolist(),
            "tokens": tokens,
            "token_count": len(tokens)
        }
        return enhanced_embedding_response(base_result, sentence, "sentence")
    except Exception as e:
        return {"error": f"Sentence embedding failed: {str(e)}"}

def sentence_similarity(sentence1: str, sentence2: str) -> Dict[str, Any]:
    try:
        emb1 = sentence_embedding(sentence1)
        emb2 = sentence_embedding(sentence2)
        
        if "error" in emb1 or "error" in emb2:
            return {"error": emb1.get("error", emb2.get("error"))}
        
        v1 = np.array(emb1["embedding"])
        v2 = np.array(emb2["embedding"])
        
        denom = (np.linalg.norm(v1) * np.linalg.norm(v2))
        similarity = float(np.dot(v1, v2) / denom) if denom != 0 else 0.0
        
        base_result = {
            "sentence1": sentence1,
            "sentence2": sentence2,
            "similarity": round(similarity, 6)
        }
        return enhanced_embedding_response(base_result, f"{sentence1} vs {sentence2}", "sentence_similarity")
    except Exception as e:
        return {"error": f"Sentence similarity failed: {str(e)}"}

# -------------------------
# Document Search 
# -------------------------
def parse_uploaded_documents(file):
    if file is None:
        return {"error": "Please upload a file (txt/csv)."}
    
    try:
        if hasattr(file, 'name'):
            file_path = file.name
        else:
            file_path = str(file)
        
        with open(file_path, 'r', encoding='utf-8') as f:
            raw = f.read()
    except UnicodeDecodeError:
        try:
            with open(file_path, 'r', encoding='latin-1') as f:
                raw = f.read()
        except Exception as e:
            return {"error": f"Encoding error: {str(e)}"}
    except Exception as e:
        return {"error": f"File reading error: {str(e)}"}

    docs = []
    if "," in raw and raw.count(",") > raw.count("\n"):
        for line in raw.splitlines():
            if line.strip():
                docs.append(line.strip())
    else:
        for line in raw.splitlines():
            if line.strip():
                docs.append(line.strip())
    
    if not docs:
        return {"error": "No documents found in the file"}
    return {"documents": docs}

def index_documents_for_search(docs: List[str]):
    if not docs:
        return {"error": "The file was empty"}
    try:
        vecs = []
        for d in docs:
            tokens = [t for t in d.split() if t.strip()]
            if not tokens:
                vecs.append(np.zeros((model.get_dimension(),), dtype=np.float32))
                continue
            mats = np.vstack([model.get_word_vector(t) for t in tokens])
            vecs.append(mats.mean(axis=0))
        M = np.vstack(vecs).astype(np.float32)
        norms = np.linalg.norm(M, axis=1, keepdims=True)
        norms[norms == 0] = 1.0
        M_norm = M / norms
        return {"matrix": M, "matrix_norm": M_norm, "docs": docs}
    except Exception as e:
        return {"error": f"Error while data indexing: {str(e)}"}

def search_documents(query: str, indexed):
    q = safe_strip(query)
    if not q:
        return {"error": "Enter a query to search"}
    try:
        q_tokens = [t for t in q.split() if t.strip()]
        if not q_tokens:
            return {"error": "Couldn't extract tokens from query"}
        q_vecs = np.vstack([model.get_word_vector(t) for t in q_tokens])
        q_avg = q_vecs.mean(axis=0)
        q_norm = np.linalg.norm(q_avg)
        if q_norm == 0:
            sims = np.zeros(indexed["matrix_norm"].shape[0], dtype=np.float32)
        else:
            q_avg = (q_avg / q_norm).astype(np.float32)
            sims = np.dot(indexed["matrix_norm"], q_avg)
        idx = np.argsort(-sims)[:10]
        results = []
        for i in idx:
            results.append({"document": indexed["docs"][i], "score": float(round(sims[i], 6))})
        return {"query": q, "results": results}
    except Exception as e:
        return {"error": f"Search failed: {str(e)}"}

# -------------------------
# API Platform  
# -------------------------
def create_api_platform():
    with gr.Column():
        # Quick Start Section
        gr.Markdown("## Quick start")
        gr.Markdown("Get started with the `Embedding_Siyabasa API` in minutes.")
        
        with gr.Tabs():
            with gr.TabItem("🐍 Python"):
                gr.Markdown("""
                ```python
                from gradio_client import Client
                
                client = Client("Remeinium/Embedding_Siyabasa")
                result = client.predict(
                    word="අම්මා",
                    api_name="/get_embedding"
                )
                print(json.dumps(result, indent=4))
                ```
                """)

                gr.Markdown("""
                #### **Accepts 1 parameter:**
                - `word` : `string`  _<u>\*Required</u>_
                - The input value that is provided in the "Sinhala Word" Textbox component.
                
                #### **Returns 1 element**
                - `str | float | bool | list | dict`
                - The output value that appears in the "Embedding Vector" Json component.
                """)

                # API Endpoints Section
                gr.Markdown("## API endpoints")
                
                # Word Embedding Endpoint
                with gr.Accordion("GET WORD EMBEDDING", open=True):
                    gr.Markdown("""
                    Get the embedding vector for a Sinhala word.
                    
                    **Python example:**
                    ```python
                    from gradio_client import Client
                    
                    client = Client("Remeinium/Embedding_Siyabasa")
                    result = client.predict(
                        word="අම්මා",
                        api_name="/get_embedding"
                    )
                    print(json.dumps(result, indent=4))
                    ```
                    
                    **Response format:**
                    ```json
                    {
                        "text": "අම්මා",
                        "embedding": [0.123, -0.456, 0.789, ...],
                        "dimensions": 300,
                        "model": "UgannA_SiyabasaV2",
                        "language": "Sinhala"
                    }
                    ```
                    """)

                    gr.Markdown("""
                        #### **Accepts 1 parameter:**
                        - `word` : `string`  _<u>\*Required</u>_
                        - The input value that is provided in the "Sinhala Word" Textbox component.
                        
                        #### **Returns 1 element**
                        - `str | float | bool | list | dict`
                        - The output value that appears in the "Embedding Vector" Json component.
                        """)
                
                # Word Similarity Endpoint
                with gr.Accordion("GET WORD SIMILARITY", open=False):
                    gr.Markdown("""
                    Compute the similarity between two Sinhala words.
                    
                    **Python example:**
                    ```python
                    from gradio_client import Client
                    
                    client = Client("Remeinium/Embedding_Siyabasa")
                    result = client.predict(
                        word1="අම්මා",
                        word2="තාත්තා", 
                        api_name="/word_similarity"
                    )
                    print(json.dumps(result, indent=4))
                    ```
                    
                    **Response format:**
                    ```json
                    {
                        "word1": "අම්මා",
                        "word2": "තාත්තා",
                        "similarity": 0.856234,
                        "model": "UgannA_SiyabasaV2"
                    }
                    ```
                    """)

                    gr.Markdown("""
                    #### **Accepts 2 parameters:**
                    1. `word1` :`string` \*_<u>Required</u>_
                       - The input value that is provided in the "Word 1" Textbox component.
                    2. `word2` : `string` \*_<u>Required</u>_
                       - The input value that is provided in the "Word 2: Textbox component.
                    
                    #### **Returns 1 element**
                    `str | float | bool | list | dict`
                    - The output value that appears in the "Similarity Result" Json component.
                    """)
                
                # Nearest Neighbors Endpoint
                with gr.Accordion("GET NEAREST NEIGHBORS", open=False):
                    gr.Markdown("""
                    Find semantically similar words for a given Sinhala word.
                    
                    **Python example:**
                    ```python
                    from gradio_client import Client
                    
                    client = Client("Remeinium/Embedding_Siyabasa")
                    result = client.predict(
                        word="පෞරාණික",
                        top_k=5,
                        api_name="/nearest_neighbors"
                    )
                    print(json.dumps(result, indent=4))
                    ```
                    
                    **Response format:**
                    ```json
                    {
                        "query": "පෞරාණික",
                        "neighbors": [
                            {"word": "ඉපැරණි", "similarity": 0.755...},
                            {"word": "පුරාවිද්යාත්මක", "similarity": 0.749...},
                            ...
                        ],
                        "model": "UgannA_SiyabasaV2"
                    }
                    ```
                    """)

                    gr.Markdown("""
                    #### **Accepts 2 parameters:**
                    1. `word` : `str` \*_<u>Required</u>_
                       - The input value that is provided in the "Query Word" Textbox component.
                    2. `top_k` : `float` _Default: 10_
                       - The input value that is provided in the "Number of Results" Slider component.
                    
                    #### **Returns 1 element**
                    `str | float | bool | list | dict`
                    - The output value that appears in the "Similar Words" Json component.""")
                
                # Sentence Embedding Endpoint
                with gr.Accordion("GET SENTENCE EMBEDDING", open=False):
                    gr.Markdown("""
                    Get the embedding vector for a Sinhala sentence.
                    
                    **Python example:**
                    ```python
                    from gradio_client import Client
                    
                    client = Client("Remeinium/Embedding_Siyabasa")
                    result = client.predict(
                        sentence="මම පාසලට යමි",
                        api_name="/sentence_embedding"
                    )
                    print(json.dumps(result, indent=4))
                    ```
                    
                    **Response format:**
                    ```json
                    {
                        "sentence": "මම පාසලට යමි",
                        "embedding": [0.123, -0.456, 0.789, ...],
                        "dimensions": 300,
                        "tokens": ["මම", "පාසලට", "යමි"],
                        "model": "UgannA_SiyabasaV2"
                    }
                    ```
                    """)

                    gr.Markdown("""
                        #### Accepts 1 parameter:
                        - `sentence` : `str` \*_<u>Required</u>_
                          - The input value that is provided in the "Sinhala Sentence" Textbox component.
                        
                        #### **Returns 1 element**
                        `str | float | bool | list | dict`
                        - The output value that appears in the "Sentence Embedding" Json component.
                    """)
                
                # Sentence Similarity Endpoint
                with gr.Accordion("GET SENTENCE SIMILARITY", open=False):
                    gr.Markdown("""
                    Compute the similarity between two Sinhala sentences.
                    
                    **Python example:**
                    ```python
                    from gradio_client import Client
                    
                    client = Client("Remeinium/Embedding_Siyabasa")
                    result = client.predict(
                        sentence1="මම පාසලට යමි",
                        sentence2="ඔහු පාසලට යයි",
                        api_name="/sentence_similarity"
                    )
                    print(json.dumps(result, indent=4))
                    ```
                    
                    **Response format:**
                    ```json
                    {
                        "sentence1": "මම පාසලට යමි",
                        "sentence2": "ඔහු පාසලට යයි", 
                        "similarity": 0.734567,
                        "model": "UgannA_SiyabasaV2"
                    }
                    ```
                    """)

                    gr.Markdown("""
                        **Accepts 2 parameters:**
                        1. `sentence1` : `str` \*_<u>Required</u>_
                           - The input value that is provided in the "Sentence A" Textbox component.
                        2. `sentence2` : `str` \*_<u>Required</u>_
                           - The input value that is provided in the "Sentence B" Textbox component.
                        
                        #### **Returns 1 element**
                        `str | float | bool | list | dict`
                        - The output value that appears in the "Sentence Similarity" Json component.
                    """)
                
                # Document Search Endpoints
                with gr.Accordion("DOCUMENT SEARCH", open=False):
                    gr.Markdown("""
                    Upload documents and perform semantic search.
                    
                    **Step 1: Index documents**
                    ```python
                    from gradio_client import Client, handle_file
                    
                    client = Client("Remeinium/Embedding_Siyabasa")
                    result = client.predict(
                        file=handle_file('path/to/documents.txt'),
                        api_name="/_index_upload"
                    )
                    print(json.dumps(result, indent=4))
                    ``` """)

                    gr.Markdown("""
                        #### **Accepts 1 parameter:**
                        1. `file` : `filepath` \*_<u>Required</u>_
                           The input value that is provided in the "Upload .txt or .csv File" File component. The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded. Attributes: path: The server file path where the file is stored. url: The normalized server URL pointing to the file. size: The size of the file in bytes. orig_name: The original filename before upload. mime_type: The MIME type of the file. is_stream: Indicates whether the file is a stream. meta: Additional metadata used internally (should not be changed).

                        #### **Returns tuple of 2 elements**                    
                        1. `dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)`
                           - The output value that appears in the `value_45` Dataframe component.
                        2. `str`
                           - The output value that appears in the "Status" Textbox component.
                    """)

                    gr.Markdown("""
                    **Step 2: Search documents**
                    ```python
                    from gradio_client import Client
                    
                    client = Client("Remeinium/Embedding_Siyabasa")
                    result = client.predict(
                        query="සිංහල භාෂාව",
                        topn_=5,
                        api_name="/_search_wrapper"
                    )
                    print(json.dumps(result, indent=4))
                    ```
                    """)

                    gr.Markdown("""
                        ### **Accepts 2 parameters:**
                        1. `query` : `string` \*_<u>Required</u>_
                           - The input value that is provided in the `Search Query` Textbox component.
                        2. `topn_` : `float` _Default 5_
                           - The input value that is provided in the "Number of Results" Slider component.
                    
                        #### **Returns 1 element**
                        `str | float | bool | list | dict`
                        - The output value that appears in the `Search Results` Json component.
                    """)
            
            with gr.TabItem("</> JavaScript"):
                gr.Markdown("""
                ```javascript
                import { Client } from "@gradio/client";
                
                const client = await Client.connect("Remeinium/Embedding_Siyabasa");
                const result = await client.predict("/get_embedding", {
                    word: "අම්මා"
                });
                console.log(result.data);
                ```
                """)

                web_btn_js = gr.Button("Refer the Complete Javascript API Documentation", elem_id="button_green")
                js_code = f"() => window.open('{cu_docs}', '_blank')"
                web_btn_js.click(None, None, None, js=js_code)
            
            with gr.TabItem("␥ cURL"):
                gr.Markdown("""
                ```bash
                curl -X POST https://remeinium-embedding-siyabasa.hf.space/gradio_api/call/get_embedding \\
                  -H "Content-Type: application/json" \\
                  -d '{"data": ["අම්මා"]}' | awk -F'"' '{ print $4}' | read EVENT_ID; \\
                curl -N https://remeinium-embedding-siyabasa.hf.space/gradio_api/call/get_embedding/$EVENT_ID
                ```
                """)

                web_btn_cu = gr.Button("Refer the Complete cURL API Documentation", elem_id="button_green")
                js_code = f"() => window.open('{cu_docs}', '_blank')"
                web_btn_cu.click(None, None, None, js=js_code)
        
        # Model Information
        gr.Markdown("## Model Details")

        gr.Markdown("""
        | Property | Description |
        |----------|-------------|
        | **Model**| Embedding_Siyabasa API<br>`UgannA_SiyabasaV2` |
        | **Supported data types**<br>Input<br>Output | <br>Text<br>Text embeddings |
        | **Token limits**<br>Input token limit<br>Output dimension size | <br>1000<br>300 |
        | **Version**<br>Model<br>API | <br>V_2.0<br>V_1.0|
        | **Latest update** | August 2025 |
        | **Language** | `Sinhala` only |
        """)
        
        # Usage and Limits
        gr.Markdown("## Usage and limits")
        gr.Markdown("""
        - **Always Free**: Unlimited requests (subject to fair usage)
        - **Rate limits**: Applied only during high traffic to ensure service stability
        """)
        
        # Support 
        gr.Markdown("## Support")
        gr.Markdown("""
        - **Read Official <a href="https://esdocs.ai.remeinium.com" target="_blank">Documentation</a>.**
        - **Technical support**: support@remeinium.com
        - **Bug reports**: Create an issue in the Space discussions
        - **Feature requests**: Contact support@remeinium.com
        
        > **Note**: This API is designed specifically for **Sinhala** language processing and **may not work with other languages.**
        """)

        web_btn_site = gr.Button("Visit Remeinium AI", elem_id="button_green")
        js_code = f"() => window.open('{website}', '_blank')"
        web_btn_site.click(None, None, None, js=js_code)

# -------------------------
# Main Application
# -------------------------
with gr.Blocks(title="Sinhala Embeddings API", css=styles) as demo:
    gr.Markdown("""
            # 🇱🇰 Embedding_Siyabasa - Sinhala | An Advanced Embeddings API for Sinhala Language
            ## Welcome to the official HuggingFace Space for _Embedding Siyabasa_
            
            The `Embedding_Siyabasa API` provides high-quality text embedding models specifically designed for the `Sinhala` language. Generate embeddings for Sinhala words, phrases, and sentences using our latest model `UgannA_SiyabasaV2`. These language-specific embeddings power advanced **NLP tasks such as semantic search, text classification, and document clustering**, delivering more accurate and context-aware results than traditional keyword-based approaches.

            Get the Model (`UgannA_SiyabasaV2`): https://huggingface.co/Remeinium/UgannA_SiyabasaV2
            
            **Key features:**
            - **Language-specific**: Optimized exclusively for Sinhala text
            - **300-dimensional embeddings**: Rich semantic representations
            - **FastText architecture**: Proven performance for morphologically rich languages
            """)

    with gr.Row():
        web_btn = gr.Button("Refer the Complete API Documentation", elem_id="button_green")
        js_code = f"() => window.open('{docs}', '_blank')"
        web_btn.click(None, None, None, js=js_code)
    
        web_btn_site = gr.Button("Visit Remeinium AI", elem_id="button")
        js_code = f"() => window.open('{website}', '_blank')"
        web_btn_site.click(None, None, None, js=js_code)
    
    with gr.Tabs():
        # Playground
        with gr.TabItem("🧩 Embedding Playground"):
            gr.Markdown("## Explore Model Capabilities")
            gr.Markdown("Test the model directly without API access requirements.")
            
            # Word Embedding
            with gr.Row():
                inp = gr.Textbox(label="Sinhala Word", placeholder="අම්මා, සියබස, නූතන")
                out = gr.JSON(label="Embedding Vector")
            
            gr.Examples(
                examples=[["අම්මා"], ["සියබස"], ["නූතන"], ["ප්‍රජාතන්ත්‍රවාදය"]],
                inputs=inp, outputs=out, fn=get_embedding, cache_examples=True
            )

            btn = gr.Button("Get Embedding", elem_id="button")
            btn.click(fn=get_embedding, inputs=inp, outputs=out)
            
            # Word Similarity
            gr.Markdown("### Word Similarity")
            with gr.Row():
                ws_a = gr.Textbox(label="Word A", placeholder="අම්මා")
                ws_b = gr.Textbox(label="Word B", placeholder="තාත්තා")
                ws_out = gr.JSON(label="Similarity Result")
            ws_btn = gr.Button("Compare Words", elem_id="button")
            ws_btn.click(fn=word_similarity, inputs=[ws_a, ws_b], outputs=ws_out)
            
            # Nearest Neighbors
            gr.Markdown("### Semantic Search")
            with gr.Row():
                nn_word = gr.Textbox(label="Query Word", placeholder="පෞරාණික")
                nn_k = gr.Slider(1, 50, 10, label="Number of Results")
                nn_out = gr.JSON(label="Similar Words")

            gr.Examples(
                examples=[["අම්මා"], ["සියබස"], ["නූතන"], ["ප්‍රජාතන්ත්‍රවාදය"]],
                inputs=nn_word, outputs=nn_out, fn=nearest_neighbors, cache_examples=True
            )
            
            nn_btn = gr.Button("Find Similar Words", elem_id="button")
            nn_btn.click(fn=nearest_neighbors, inputs=[nn_word, nn_k], outputs=nn_out)
            
            # Sentence Operations
            gr.Markdown("### Sentence Operations")
            with gr.Row():
                sent_inp = gr.Textbox(label="Sinhala Sentence", placeholder="මම පාසලට යමි")
                sent_out = gr.JSON(label="Sentence Embedding")

            gr.Examples(
                examples=[["මම පාසලට යමි"], ["ආරෝග්‍යා පරමා ලාභා"], ["ඔබට බොහොම ස්තුතියි."]],
                inputs=sent_inp, outputs=sent_out, fn=sentence_embedding, cache_examples=True
            )
            
            sent_btn = gr.Button("Get Sentence Embedding", elem_id="button")
            sent_btn.click(fn=sentence_embedding, inputs=sent_inp, outputs=sent_out)
            
            with gr.Row():
                sa = gr.Textbox(label="Sentence A", placeholder="මම පාසලට යමි")
                sb = gr.Textbox(label="Sentence B", placeholder="ඔහු පාසලට යයි")
                ssim_out = gr.JSON(label="Sentence Similarity")
            ssim_btn = gr.Button("Compare Sentences", elem_id="button")
            ssim_btn.click(fn=sentence_similarity, inputs=[sa, sb], outputs=ssim_out)
            
            # Document Search
            gr.Markdown("### Document Semantic Search")
            gr.Markdown("Upload a text file (one document per line) for semantic search.")
            
            status_display = gr.Textbox(label="Status", value="Ready to upload documents", interactive=False)
            
            with gr.Row():
                upload = gr.File(label="Upload .txt or .csv File", file_count="single")
                docs_list = gr.Dataframe(headers=["Document Preview"], interactive=False)
            
            idx_btn = gr.Button("Index Documents", elem_id="button")
            indexed_state = gr.State(value=None)
            
            def _index_upload(file):
                if file is None:
                    return None, gr.update(value=[]), "Please upload a file first"
                
                parsed = parse_uploaded_documents(file)
                if "error" in parsed:
                    return None, gr.update(value=[]), parsed["error"]
                
                docs = parsed["documents"]
                indexed = index_documents_for_search(docs)
                if "error" in indexed:
                    return None, gr.update(value=[]), indexed["error"]
                
                preview = [[(d[:200] + "..." if len(d) > 200 else d)] for d in docs[:20]]
                return indexed, gr.update(value=preview), f"Indexed {len(docs)} documents"
            
            idx_btn.click(_index_upload, inputs=[upload], outputs=[indexed_state, docs_list, status_display])
            
            with gr.Row():
                q = gr.Textbox(label="Search Query")
                topn = gr.Slider(1, 20, 5, label="Number of Results")
                results_out = gr.JSON(label="Search Results")
            
            def _search_wrapper(query, topn_, state):
                if state is None:
                    return {"error": "Please index documents first"}
                res = search_documents(query, state)
                if "results" in res:
                    res["results"] = res["results"][:int(topn_)]
                return res
            
            search_btn = gr.Button("Search Documents", elem_id="button")
            search_btn.click(fn=_search_wrapper, inputs=[q, topn, indexed_state], outputs=[results_out])
        
        # API Platform Tab
        with gr.TabItem("⚡ API Platform"):
            create_api_platform()

        with gr.TabItem("💡 Status"):
            # gr.Markdown("Check at : https://stats.uptimerobot.com/HZFBOsSvBT")

            web_btn_status = gr.Button("Check Status", elem_id="button")
            js_code = f"() => window.open('{status}', '_blank')"
            web_btn_status.click(None, None, None, js=js_code)
            
    
    gr.Markdown("""
    ---
    *✨ **<a href="https://ai.remeinium.com" target="_blank">Remeinium AI</a>** · _Intelligence for a greater tomorrow._*
    """)

if __name__ == "__main__":
    # demo.queue(default_concurrency_limit=10, max_size=20).launch()
    demo.launch()