Spaces:

Sergidev
/

3dembed

Sleeping

App Files Files Community

Sergidev commited on Aug 20, 2024

Commit

5a0b505

verified ·

1 Parent(s): 35042da

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -38

app.py CHANGED Viewed

@@ -1,51 +1,58 @@
 import gradio as gr
 import plotly.graph_objects as go
-import hashlib
-def simple_embedding(text, dim=3):
-    """A simple hash-based embedding function for demonstration purposes."""
-    hash_value = hashlib.md5(text.encode()).hexdigest()
-    return [int(hash_value[i:i+2], 16) / 255.0 for i in range(0, dim*2, 2)]
-def compare_embeddings(*texts):
-    embeddings = [simple_embedding(text) for text in texts if text.strip()]  # Only process non-empty texts
-    fig = go.Figure()
-    colors = ['red', 'blue', 'green', 'purple', 'orange', 'cyan', 'magenta', 'yellow']
-    for i, emb in enumerate(embeddings):
-        color = colors[i % len(colors)]
-        fig.add_trace(go.Scatter3d(
-            x=[0, emb[0]], y=[0, emb[1]], z=[0, emb[2]],
-            mode='lines+markers',
-            name=f'Text {i+1}',
-            line=dict(color=color),
-            marker=dict(color=color)
-        ))
     fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'))
     return fig
-with gr.Blocks() as iface:
-    gr.Markdown("# 3D Embedding Comparison (Simplified)")
-    gr.Markdown("Compare simplified embeddings of multiple strings visualized in 3D space.")
-    gr.Markdown("Note: This is a demonstration using a basic hash-based embedding, not a real NLP model.")
-    with gr.Row():
-        num_inputs = gr.Slider(minimum=2, maximum=10, step=1, value=2, label="Number of texts to compare")
-    with gr.Row() as text_container:
-        text_inputs = [gr.Textbox(label=f"Text {i+1}") for i in range(2)]
-    output = gr.Plot()
-    submit_btn = gr.Button("Compare Embeddings")
-    def update_text_inputs(num):
-        return {text_container: gr.Row.update(children=[gr.Textbox(label=f"Text {i+1}") for i in range(num)])}
-    num_inputs.change(fn=update_text_inputs, inputs=[num_inputs], outputs=[text_container])
-    submit_btn.click(fn=compare_embeddings, inputs=text_container.children, outputs=output)
 iface.launch()

 import gradio as gr
+import spaces
+import torch
+from transformers import AutoTokenizer, AutoModel
 import plotly.graph_objects as go
+model_name = "mistralai/Mistral-7B-v0.1"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = None
+# Set pad token to eos token if not defined
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+@spaces.GPU
+def get_embedding(text):
+    global model
+    if model is None:
+        model = AutoModel.from_pretrained(model_name).cuda()
+        model.resize_token_embeddings(len(tokenizer))
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to('cuda')
+    with torch.no_grad():
+        outputs = model(**inputs)
+    return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
+def reduce_to_3d(embedding):
+    return embedding[:3]
+@spaces.GPU
+def compare_embeddings(text1, text2):
+    emb1 = get_embedding(text1)
+    emb2 = get_embedding(text2)
+    emb1_3d = reduce_to_3d(emb1)
+    emb2_3d = reduce_to_3d(emb2)
+    fig = go.Figure(data=[
+        go.Scatter3d(x=[0, emb1_3d[0]], y=[0, emb1_3d[1]], z=[0, emb1_3d[2]], mode='lines+markers', name='Text 1'),
+        go.Scatter3d(x=[0, emb2_3d[0]], y=[0, emb2_3d[1]], z=[0, emb2_3d[2]], mode='lines+markers', name='Text 2')
+    ])
     fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'))
     return fig
+iface = gr.Interface(
+    fn=compare_embeddings,
+    inputs=[
+        gr.Textbox(label="Text 1"),
+        gr.Textbox(label="Text 2")
+    ],
+    outputs=gr.Plot(),
+    title="3D Embedding Comparison",
+    description="Compare the embeddings of two strings visualized in 3D space using Mistral 7B."
+)
 iface.launch()