File size: 6,817 Bytes
73baae2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
"""

ATLES-ECHO - Semantic Embedding Service

A Hugging Face Space for generating embeddings using the ATLES Champion model.

"""

import gradio as gr
from sentence_transformers import SentenceTransformer
import numpy as np

# Load the ATLES Champion embedding model
print("Loading ATLES Champion Embedding model...")
model = SentenceTransformer("spartan8806/atles-champion-embedding")
print(f"Model loaded! Dimension: {model.get_sentence_embedding_dimension()}")

def generate_embedding(text: str) -> dict:
    """Generate embedding for input text"""
    if not text or not text.strip():
        return {"error": "Please enter some text", "embedding": None, "dimension": None}
    
    # Generate embedding
    embedding = model.encode(text, normalize_embeddings=True)
    
    return {
        "text_preview": text[:100] + "..." if len(text) > 100 else text,
        "dimension": len(embedding),
        "embedding_preview": embedding[:10].tolist(),  # First 10 values
        "embedding_full": embedding.tolist()
    }

def compare_texts(text1: str, text2: str) -> dict:
    """Compare similarity between two texts"""
    if not text1.strip() or not text2.strip():
        return {"error": "Please enter both texts", "similarity": None}
    
    # Generate embeddings
    embeddings = model.encode([text1, text2], normalize_embeddings=True)
    
    # Calculate cosine similarity
    similarity = float(np.dot(embeddings[0], embeddings[1]))
    
    return {
        "text1_preview": text1[:50] + "..." if len(text1) > 50 else text1,
        "text2_preview": text2[:50] + "..." if len(text2) > 50 else text2,
        "similarity": round(similarity, 4),
        "similarity_percent": f"{similarity * 100:.1f}%",
        "interpretation": get_similarity_interpretation(similarity)
    }

def get_similarity_interpretation(score: float) -> str:
    """Interpret similarity score"""
    if score >= 0.9:
        return "🟒 Nearly identical meaning"
    elif score >= 0.7:
        return "🟑 Very similar"
    elif score >= 0.5:
        return "🟠 Somewhat related"
    elif score >= 0.3:
        return "πŸ”΄ Loosely related"
    else:
        return "⚫ Different topics"

def batch_embed(texts: str) -> dict:
    """Generate embeddings for multiple texts (one per line)"""
    lines = [l.strip() for l in texts.split('\n') if l.strip()]
    
    if not lines:
        return {"error": "Please enter at least one text (one per line)", "embeddings": None}
    
    if len(lines) > 10:
        return {"error": "Maximum 10 texts at a time", "embeddings": None}
    
    # Generate embeddings
    embeddings = model.encode(lines, normalize_embeddings=True)
    
    results = []
    for i, (text, emb) in enumerate(zip(lines, embeddings)):
        results.append({
            "index": i + 1,
            "text": text[:50] + "..." if len(text) > 50 else text,
            "embedding_preview": emb[:5].tolist()
        })
    
    return {
        "count": len(lines),
        "dimension": len(embeddings[0]),
        "results": results
    }

# Create Gradio interface
with gr.Blocks(
    title="ATLES-ECHO Embedding Service",
    theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan")
) as demo:
    
    gr.Markdown("""

    # 🧠 ATLES-ECHO Embedding Service

    

    Generate high-quality semantic embeddings using the **ATLES Champion** model.

    

    - **Model**: [spartan8806/atles-champion-embedding](https://huggingface.co/spartan8806/atles-champion-embedding)

    - **Dimension**: 768

    - **Top-10 MTEB Performance**: Pearson 0.8445, Spearman 0.8374

    """)
    
    with gr.Tabs():
        # Tab 1: Single Embedding
        with gr.TabItem("πŸ”€ Single Embedding"):
            gr.Markdown("Generate an embedding for a single piece of text.")
            
            with gr.Row():
                with gr.Column():
                    single_input = gr.Textbox(
                        label="Input Text",
                        placeholder="Enter text to embed...",
                        lines=3
                    )
                    single_btn = gr.Button("Generate Embedding", variant="primary")
                
                with gr.Column():
                    single_output = gr.JSON(label="Embedding Result")
            
            single_btn.click(
                fn=generate_embedding,
                inputs=single_input,
                outputs=single_output
            )
        
        # Tab 2: Compare Texts
        with gr.TabItem("βš–οΈ Compare Similarity"):
            gr.Markdown("Compare the semantic similarity between two texts.")
            
            with gr.Row():
                text1_input = gr.Textbox(label="Text 1", placeholder="First text...", lines=2)
                text2_input = gr.Textbox(label="Text 2", placeholder="Second text...", lines=2)
            
            compare_btn = gr.Button("Compare Similarity", variant="primary")
            compare_output = gr.JSON(label="Similarity Result")
            
            compare_btn.click(
                fn=compare_texts,
                inputs=[text1_input, text2_input],
                outputs=compare_output
            )
        
        # Tab 3: Batch Embedding
        with gr.TabItem("πŸ“¦ Batch Embed"):
            gr.Markdown("Generate embeddings for multiple texts (one per line, max 10).")
            
            with gr.Row():
                with gr.Column():
                    batch_input = gr.Textbox(
                        label="Texts (one per line)",
                        placeholder="Text 1\nText 2\nText 3...",
                        lines=6
                    )
                    batch_btn = gr.Button("Generate Batch Embeddings", variant="primary")
                
                with gr.Column():
                    batch_output = gr.JSON(label="Batch Results")
            
            batch_btn.click(
                fn=batch_embed,
                inputs=batch_input,
                outputs=batch_output
            )
    
    gr.Markdown("""

    ---

    ### About ATLES-ECHO

    

    ATLES-ECHO is the semantic memory core of the ATLES ecosystem - your AI digital twin that learns from your digital life.

    

    **Features:**

    - 🧠 High-quality semantic embeddings (768 dimensions)

    - ⚑ Fast inference with normalized vectors

    - 🎯 Top-10 MTEB benchmark performance

    - πŸ”’ Built for the ATLES privacy-first ecosystem

    

    [View Model Card](https://huggingface.co/spartan8806/atles-champion-embedding) | [ATLES GitHub](https://github.com/spartan8806)

    """)

# Launch the app
if __name__ == "__main__":
    demo.launch()