Spaces:

spartan8806
/

atles-echo

Sleeping

File size: 6,817 Bytes

73baae2

"""

ATLES-ECHO - Semantic Embedding Service

A Hugging Face Space for generating embeddings using the ATLES Champion model.

"""

import gradio as gr
from sentence_transformers import SentenceTransformer
import numpy as np

# Load the ATLES Champion embedding model
print("Loading ATLES Champion Embedding model...")
model = SentenceTransformer("spartan8806/atles-champion-embedding")
print(f"Model loaded! Dimension: {model.get_sentence_embedding_dimension()}")

def generate_embedding(text: str) -> dict:
    """Generate embedding for input text"""
    if not text or not text.strip():
        return {"error": "Please enter some text", "embedding": None, "dimension": None}
    
    # Generate embedding
    embedding = model.encode(text, normalize_embeddings=True)
    
    return {
        "text_preview": text[:100] + "..." if len(text) > 100 else text,
        "dimension": len(embedding),
        "embedding_preview": embedding[:10].tolist(),  # First 10 values
        "embedding_full": embedding.tolist()
    }

def compare_texts(text1: str, text2: str) -> dict:
    """Compare similarity between two texts"""
    if not text1.strip() or not text2.strip():
        return {"error": "Please enter both texts", "similarity": None}
    
    # Generate embeddings
    embeddings = model.encode([text1, text2], normalize_embeddings=True)
    
    # Calculate cosine similarity
    similarity = float(np.dot(embeddings[0], embeddings[1]))
    
    return {
        "text1_preview": text1[:50] + "..." if len(text1) > 50 else text1,
        "text2_preview": text2[:50] + "..." if len(text2) > 50 else text2,
        "similarity": round(similarity, 4),
        "similarity_percent": f"{similarity * 100:.1f}%",
        "interpretation": get_similarity_interpretation(similarity)
    }

def get_similarity_interpretation(score: float) -> str:
    """Interpret similarity score"""
    if score >= 0.9:
        return "🟢 Nearly identical meaning"
    elif score >= 0.7:
        return "🟡 Very similar"
    elif score >= 0.5:
        return "🟠 Somewhat related"
    elif score >= 0.3:
        return "🔴 Loosely related"
    else:
        return "⚫ Different topics"

def batch_embed(texts: str) -> dict:
    """Generate embeddings for multiple texts (one per line)"""
    lines = [l.strip() for l in texts.split('\n') if l.strip()]
    
    if not lines:
        return {"error": "Please enter at least one text (one per line)", "embeddings": None}
    
    if len(lines) > 10:
        return {"error": "Maximum 10 texts at a time", "embeddings": None}
    
    # Generate embeddings
    embeddings = model.encode(lines, normalize_embeddings=True)
    
    results = []
    for i, (text, emb) in enumerate(zip(lines, embeddings)):
        results.append({
            "index": i + 1,
            "text": text[:50] + "..." if len(text) > 50 else text,
            "embedding_preview": emb[:5].tolist()
        })
    
    return {
        "count": len(lines),
        "dimension": len(embeddings[0]),
        "results": results
    }

# Create Gradio interface
with gr.Blocks(
    title="ATLES-ECHO Embedding Service",
    theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan")
) as demo:
    
    gr.Markdown("""

    # 🧠 ATLES-ECHO Embedding Service

    

    Generate high-quality semantic embeddings using the **ATLES Champion** model.

    

    - **Model**: [spartan8806/atles-champion-embedding](https://huggingface.co/spartan8806/atles-champion-embedding)

    - **Dimension**: 768

    - **Top-10 MTEB Performance**: Pearson 0.8445, Spearman 0.8374

    """)
    
    with gr.Tabs():
        # Tab 1: Single Embedding
        with gr.TabItem("🔤 Single Embedding"):
            gr.Markdown("Generate an embedding for a single piece of text.")
            
            with gr.Row():
                with gr.Column():
                    single_input = gr.Textbox(
                        label="Input Text",
                        placeholder="Enter text to embed...",
                        lines=3
                    )
                    single_btn = gr.Button("Generate Embedding", variant="primary")
                
                with gr.Column():
                    single_output = gr.JSON(label="Embedding Result")
            
            single_btn.click(
                fn=generate_embedding,
                inputs=single_input,
                outputs=single_output
            )
        
        # Tab 2: Compare Texts
        with gr.TabItem("⚖️ Compare Similarity"):
            gr.Markdown("Compare the semantic similarity between two texts.")
            
            with gr.Row():
                text1_input = gr.Textbox(label="Text 1", placeholder="First text...", lines=2)
                text2_input = gr.Textbox(label="Text 2", placeholder="Second text...", lines=2)
            
            compare_btn = gr.Button("Compare Similarity", variant="primary")
            compare_output = gr.JSON(label="Similarity Result")
            
            compare_btn.click(
                fn=compare_texts,
                inputs=[text1_input, text2_input],
                outputs=compare_output
            )
        
        # Tab 3: Batch Embedding
        with gr.TabItem("📦 Batch Embed"):
            gr.Markdown("Generate embeddings for multiple texts (one per line, max 10).")
            
            with gr.Row():
                with gr.Column():
                    batch_input = gr.Textbox(
                        label="Texts (one per line)",
                        placeholder="Text 1\nText 2\nText 3...",
                        lines=6
                    )
                    batch_btn = gr.Button("Generate Batch Embeddings", variant="primary")
                
                with gr.Column():
                    batch_output = gr.JSON(label="Batch Results")
            
            batch_btn.click(
                fn=batch_embed,
                inputs=batch_input,
                outputs=batch_output
            )
    
    gr.Markdown("""

    ---

    ### About ATLES-ECHO

    

    ATLES-ECHO is the semantic memory core of the ATLES ecosystem - your AI digital twin that learns from your digital life.

    

    **Features:**

    - 🧠 High-quality semantic embeddings (768 dimensions)

    - ⚡ Fast inference with normalized vectors

    - 🎯 Top-10 MTEB benchmark performance

    - 🔒 Built for the ATLES privacy-first ecosystem

    

    [View Model Card](https://huggingface.co/spartan8806/atles-champion-embedding) | [ATLES GitHub](https://github.com/spartan8806)

    """)

# Launch the app
if __name__ == "__main__":
    demo.launch()