felipekitamura's picture
Update app.py
7e4e303 verified
import gensim.downloader
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
model = gensim.downloader.load("word2vec-google-news-300") #glove-wiki-gigaword-50
cache = "/home/user/app/d.png"
# Function to reduce dimensions
def reduce_dimensions(data, method='PCA'):
if method == 'PCA':
model = PCA(n_components=2)
elif method == 'TSNE':
model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=4)
return model.fit_transform(data)
# Plotting function
def plot_reduced_data(reduced_data, labels, title):
plt.figure(figsize=(10, 8))
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6)
for i, label in enumerate(labels):
plt.annotate(" " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18)
plt.title(title)
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.grid(True)
plt.savefig(cache) #, dpi=300)
description = """
### Word Embedding Demo App
Universidade Federal de São Paulo - Escola Paulista de Medicina
The output is the 10 words most correlated with Word 1. All words must be entered in small caps.
Credits:
* Gensim
* Word2Vec
"""
Word1 = gr.Textbox()
sp = gr.Image()
def inference(word1):
output = model.similar_by_vector(word1)
print(output)
word_list = [word1]
word_list.extend([x for x,y in [item for item in output]])
words = {key: model[key] for key in word_list}
data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
print(data.shape)
labels = words.keys()
reduced_data_pca = reduce_dimensions(data, method='PCA')
print(reduced_data_pca.shape)
plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
return cache
examples = [
["liver"],
["brainstem"],
["lungs"]
]
iface = gr.Interface(
fn=inference,
inputs=[Word1],
outputs=sp,
description=description,
examples=examples
)
iface.launch()