Spaces:

Prathmesh48
/

Test_E5

Sleeping

Prathmesh48 commited on Jun 27, 2024

Commit

45b151c

verified ·

1 Parent(s): 3e578ec

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import streamlit as st
+import torch
+from transformers import AutoTokenizer, AutoModel
+# Load the tokenizer and model
+@st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained('dwzhu/e5-base-4k', trust_remote_code=True)
+    model = AutoModel.from_pretrained('dwzhu/e5-base-4k', trust_remote_code=True)
+    model.to('cpu')
+    return tokenizer, model
+tokenizer, model = load_model()
+def extract_embeddings(text, tokenizer, model):
+    # Tokenize the input text
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+    inputs = {k: v.to('cpu') for k, v in inputs.items()}
+    # Get the model's outputs
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Extract the embeddings (use the output of the last hidden state)
+    embeddings = outputs.last_hidden_state.mean(dim=1)
+    return embeddings.squeeze().cpu().numpy()
+# Streamlit app
+st.title("Text Embeddings Extractor")
+text = st.text_area("Enter text to extract embeddings:", "This is an example sentence.")
+if st.button("Extract Embeddings"):
+    embeddings = extract_embeddings(text, tokenizer, model)
+    st.write("Embeddings:")
+    st.write(embeddings)