""" ATLES-ECHO - Semantic Embedding Service A Hugging Face Space for generating embeddings using the ATLES Champion model. """ import gradio as gr from sentence_transformers import SentenceTransformer import numpy as np # Load the ATLES Champion embedding model print("Loading ATLES Champion Embedding model...") model = SentenceTransformer("spartan8806/atles-champion-embedding") print(f"Model loaded! Dimension: {model.get_sentence_embedding_dimension()}") def generate_embedding(text: str) -> dict: """Generate embedding for input text""" if not text or not text.strip(): return {"error": "Please enter some text", "embedding": None, "dimension": None} # Generate embedding embedding = model.encode(text, normalize_embeddings=True) return { "text_preview": text[:100] + "..." if len(text) > 100 else text, "dimension": len(embedding), "embedding_preview": embedding[:10].tolist(), # First 10 values "embedding_full": embedding.tolist() } def compare_texts(text1: str, text2: str) -> dict: """Compare similarity between two texts""" if not text1.strip() or not text2.strip(): return {"error": "Please enter both texts", "similarity": None} # Generate embeddings embeddings = model.encode([text1, text2], normalize_embeddings=True) # Calculate cosine similarity similarity = float(np.dot(embeddings[0], embeddings[1])) return { "text1_preview": text1[:50] + "..." if len(text1) > 50 else text1, "text2_preview": text2[:50] + "..." if len(text2) > 50 else text2, "similarity": round(similarity, 4), "similarity_percent": f"{similarity * 100:.1f}%", "interpretation": get_similarity_interpretation(similarity) } def get_similarity_interpretation(score: float) -> str: """Interpret similarity score""" if score >= 0.9: return "🟢 Nearly identical meaning" elif score >= 0.7: return "🟡 Very similar" elif score >= 0.5: return "🟠 Somewhat related" elif score >= 0.3: return "🔴 Loosely related" else: return "⚫ Different topics" def batch_embed(texts: str) -> dict: """Generate embeddings for multiple texts (one per line)""" lines = [l.strip() for l in texts.split('\n') if l.strip()] if not lines: return {"error": "Please enter at least one text (one per line)", "embeddings": None} if len(lines) > 10: return {"error": "Maximum 10 texts at a time", "embeddings": None} # Generate embeddings embeddings = model.encode(lines, normalize_embeddings=True) results = [] for i, (text, emb) in enumerate(zip(lines, embeddings)): results.append({ "index": i + 1, "text": text[:50] + "..." if len(text) > 50 else text, "embedding_preview": emb[:5].tolist() }) return { "count": len(lines), "dimension": len(embeddings[0]), "results": results } # Create Gradio interface with gr.Blocks( title="ATLES-ECHO Embedding Service", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan") ) as demo: gr.Markdown(""" # 🧠 ATLES-ECHO Embedding Service Generate high-quality semantic embeddings using the **ATLES Champion** model. - **Model**: [spartan8806/atles-champion-embedding](https://huggingface.co/spartan8806/atles-champion-embedding) - **Dimension**: 768 - **Top-10 MTEB Performance**: Pearson 0.8445, Spearman 0.8374 """) with gr.Tabs(): # Tab 1: Single Embedding with gr.TabItem("🔤 Single Embedding"): gr.Markdown("Generate an embedding for a single piece of text.") with gr.Row(): with gr.Column(): single_input = gr.Textbox( label="Input Text", placeholder="Enter text to embed...", lines=3 ) single_btn = gr.Button("Generate Embedding", variant="primary") with gr.Column(): single_output = gr.JSON(label="Embedding Result") single_btn.click( fn=generate_embedding, inputs=single_input, outputs=single_output ) # Tab 2: Compare Texts with gr.TabItem("⚖️ Compare Similarity"): gr.Markdown("Compare the semantic similarity between two texts.") with gr.Row(): text1_input = gr.Textbox(label="Text 1", placeholder="First text...", lines=2) text2_input = gr.Textbox(label="Text 2", placeholder="Second text...", lines=2) compare_btn = gr.Button("Compare Similarity", variant="primary") compare_output = gr.JSON(label="Similarity Result") compare_btn.click( fn=compare_texts, inputs=[text1_input, text2_input], outputs=compare_output ) # Tab 3: Batch Embedding with gr.TabItem("📦 Batch Embed"): gr.Markdown("Generate embeddings for multiple texts (one per line, max 10).") with gr.Row(): with gr.Column(): batch_input = gr.Textbox( label="Texts (one per line)", placeholder="Text 1\nText 2\nText 3...", lines=6 ) batch_btn = gr.Button("Generate Batch Embeddings", variant="primary") with gr.Column(): batch_output = gr.JSON(label="Batch Results") batch_btn.click( fn=batch_embed, inputs=batch_input, outputs=batch_output ) gr.Markdown(""" --- ### About ATLES-ECHO ATLES-ECHO is the semantic memory core of the ATLES ecosystem - your AI digital twin that learns from your digital life. **Features:** - 🧠 High-quality semantic embeddings (768 dimensions) - ⚡ Fast inference with normalized vectors - 🎯 Top-10 MTEB benchmark performance - 🔒 Built for the ATLES privacy-first ecosystem [View Model Card](https://huggingface.co/spartan8806/atles-champion-embedding) | [ATLES GitHub](https://github.com/spartan8806) """) # Launch the app if __name__ == "__main__": demo.launch()