Spaces:
Sleeping
Sleeping
File size: 2,060 Bytes
db01b5c 38636da db01b5c 7e4e303 db01b5c 3eaacbe db01b5c 88cf807 db01b5c 3259e38 72de3b8 db01b5c 7e4e303 db01b5c bc9b15a db01b5c 6aa5b29 db01b5c 12c38a8 db01b5c 3259e38 db01b5c 3259e38 750b074 db01b5c 524d13b db01b5c 524d13b db01b5c 46120cb 3259e38 db01b5c bc9b15a db01b5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gensim.downloader
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
model = gensim.downloader.load("word2vec-google-news-300") #glove-wiki-gigaword-50
cache = "/home/user/app/d.png"
# Function to reduce dimensions
def reduce_dimensions(data, method='PCA'):
if method == 'PCA':
model = PCA(n_components=2)
elif method == 'TSNE':
model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=4)
return model.fit_transform(data)
# Plotting function
def plot_reduced_data(reduced_data, labels, title):
plt.figure(figsize=(10, 8))
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6)
for i, label in enumerate(labels):
plt.annotate(" " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18)
plt.title(title)
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.grid(True)
plt.savefig(cache) #, dpi=300)
description = """
### Word Embedding Demo App
Universidade Federal de São Paulo - Escola Paulista de Medicina
The output is the 10 words most correlated with Word 1. All words must be entered in small caps.
Credits:
* Gensim
* Word2Vec
"""
Word1 = gr.Textbox()
sp = gr.Image()
def inference(word1):
output = model.similar_by_vector(word1)
print(output)
word_list = [word1]
word_list.extend([x for x,y in [item for item in output]])
words = {key: model[key] for key in word_list}
data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
print(data.shape)
labels = words.keys()
reduced_data_pca = reduce_dimensions(data, method='PCA')
print(reduced_data_pca.shape)
plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
return cache
examples = [
["liver"],
["brainstem"],
["lungs"]
]
iface = gr.Interface(
fn=inference,
inputs=[Word1],
outputs=sp,
description=description,
examples=examples
)
iface.launch() |