Spaces:
Sleeping
Sleeping
| """ | |
| ATLES-ECHO - Semantic Embedding Service | |
| A Hugging Face Space for generating embeddings using the ATLES Champion model. | |
| """ | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| import numpy as np | |
| # Load the ATLES Champion embedding model | |
| print("Loading ATLES Champion Embedding model...") | |
| model = SentenceTransformer("spartan8806/atles-champion-embedding") | |
| print(f"Model loaded! Dimension: {model.get_sentence_embedding_dimension()}") | |
| def generate_embedding(text: str) -> dict: | |
| """Generate embedding for input text""" | |
| if not text or not text.strip(): | |
| return {"error": "Please enter some text", "embedding": None, "dimension": None} | |
| # Generate embedding | |
| embedding = model.encode(text, normalize_embeddings=True) | |
| return { | |
| "text_preview": text[:100] + "..." if len(text) > 100 else text, | |
| "dimension": len(embedding), | |
| "embedding_preview": embedding[:10].tolist(), # First 10 values | |
| "embedding_full": embedding.tolist() | |
| } | |
| def compare_texts(text1: str, text2: str) -> dict: | |
| """Compare similarity between two texts""" | |
| if not text1.strip() or not text2.strip(): | |
| return {"error": "Please enter both texts", "similarity": None} | |
| # Generate embeddings | |
| embeddings = model.encode([text1, text2], normalize_embeddings=True) | |
| # Calculate cosine similarity | |
| similarity = float(np.dot(embeddings[0], embeddings[1])) | |
| return { | |
| "text1_preview": text1[:50] + "..." if len(text1) > 50 else text1, | |
| "text2_preview": text2[:50] + "..." if len(text2) > 50 else text2, | |
| "similarity": round(similarity, 4), | |
| "similarity_percent": f"{similarity * 100:.1f}%", | |
| "interpretation": get_similarity_interpretation(similarity) | |
| } | |
| def get_similarity_interpretation(score: float) -> str: | |
| """Interpret similarity score""" | |
| if score >= 0.9: | |
| return "π’ Nearly identical meaning" | |
| elif score >= 0.7: | |
| return "π‘ Very similar" | |
| elif score >= 0.5: | |
| return "π Somewhat related" | |
| elif score >= 0.3: | |
| return "π΄ Loosely related" | |
| else: | |
| return "β« Different topics" | |
| def batch_embed(texts: str) -> dict: | |
| """Generate embeddings for multiple texts (one per line)""" | |
| lines = [l.strip() for l in texts.split('\n') if l.strip()] | |
| if not lines: | |
| return {"error": "Please enter at least one text (one per line)", "embeddings": None} | |
| if len(lines) > 10: | |
| return {"error": "Maximum 10 texts at a time", "embeddings": None} | |
| # Generate embeddings | |
| embeddings = model.encode(lines, normalize_embeddings=True) | |
| results = [] | |
| for i, (text, emb) in enumerate(zip(lines, embeddings)): | |
| results.append({ | |
| "index": i + 1, | |
| "text": text[:50] + "..." if len(text) > 50 else text, | |
| "embedding_preview": emb[:5].tolist() | |
| }) | |
| return { | |
| "count": len(lines), | |
| "dimension": len(embeddings[0]), | |
| "results": results | |
| } | |
| # Create Gradio interface | |
| with gr.Blocks( | |
| title="ATLES-ECHO Embedding Service", | |
| theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan") | |
| ) as demo: | |
| gr.Markdown(""" | |
| # π§ ATLES-ECHO Embedding Service | |
| Generate high-quality semantic embeddings using the **ATLES Champion** model. | |
| - **Model**: [spartan8806/atles-champion-embedding](https://huggingface.co/spartan8806/atles-champion-embedding) | |
| - **Dimension**: 768 | |
| - **Top-10 MTEB Performance**: Pearson 0.8445, Spearman 0.8374 | |
| """) | |
| with gr.Tabs(): | |
| # Tab 1: Single Embedding | |
| with gr.TabItem("π€ Single Embedding"): | |
| gr.Markdown("Generate an embedding for a single piece of text.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| single_input = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter text to embed...", | |
| lines=3 | |
| ) | |
| single_btn = gr.Button("Generate Embedding", variant="primary") | |
| with gr.Column(): | |
| single_output = gr.JSON(label="Embedding Result") | |
| single_btn.click( | |
| fn=generate_embedding, | |
| inputs=single_input, | |
| outputs=single_output | |
| ) | |
| # Tab 2: Compare Texts | |
| with gr.TabItem("βοΈ Compare Similarity"): | |
| gr.Markdown("Compare the semantic similarity between two texts.") | |
| with gr.Row(): | |
| text1_input = gr.Textbox(label="Text 1", placeholder="First text...", lines=2) | |
| text2_input = gr.Textbox(label="Text 2", placeholder="Second text...", lines=2) | |
| compare_btn = gr.Button("Compare Similarity", variant="primary") | |
| compare_output = gr.JSON(label="Similarity Result") | |
| compare_btn.click( | |
| fn=compare_texts, | |
| inputs=[text1_input, text2_input], | |
| outputs=compare_output | |
| ) | |
| # Tab 3: Batch Embedding | |
| with gr.TabItem("π¦ Batch Embed"): | |
| gr.Markdown("Generate embeddings for multiple texts (one per line, max 10).") | |
| with gr.Row(): | |
| with gr.Column(): | |
| batch_input = gr.Textbox( | |
| label="Texts (one per line)", | |
| placeholder="Text 1\nText 2\nText 3...", | |
| lines=6 | |
| ) | |
| batch_btn = gr.Button("Generate Batch Embeddings", variant="primary") | |
| with gr.Column(): | |
| batch_output = gr.JSON(label="Batch Results") | |
| batch_btn.click( | |
| fn=batch_embed, | |
| inputs=batch_input, | |
| outputs=batch_output | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### About ATLES-ECHO | |
| ATLES-ECHO is the semantic memory core of the ATLES ecosystem - your AI digital twin that learns from your digital life. | |
| **Features:** | |
| - π§ High-quality semantic embeddings (768 dimensions) | |
| - β‘ Fast inference with normalized vectors | |
| - π― Top-10 MTEB benchmark performance | |
| - π Built for the ATLES privacy-first ecosystem | |
| [View Model Card](https://huggingface.co/spartan8806/atles-champion-embedding) | [ATLES GitHub](https://github.com/spartan8806) | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() | |