word_embeddings2

Sleeping

File size: 2,060 Bytes

db01b5c
 
 
 
 
 
 
38636da
db01b5c
7e4e303
db01b5c
 
 
 
 
 
3eaacbe
db01b5c
 
 
 
 
 
 
88cf807
db01b5c
3259e38
72de3b8
db01b5c
 
 
7e4e303
db01b5c
 
 
 
 
bc9b15a
db01b5c
 
 
6aa5b29
db01b5c
 
 
12c38a8
db01b5c
 
3259e38
 
 
db01b5c
3259e38
750b074
db01b5c
 
 
 
524d13b
db01b5c
524d13b
db01b5c
 
 
46120cb
 
3259e38
db01b5c
 
 
 
bc9b15a
db01b5c

import gensim.downloader 
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
model = gensim.downloader.load("word2vec-google-news-300") #glove-wiki-gigaword-50

cache = "/home/user/app/d.png"

# Function to reduce dimensions
def reduce_dimensions(data, method='PCA'):
    if method == 'PCA':
        model = PCA(n_components=2)
    elif method == 'TSNE':
        model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=4)
    return model.fit_transform(data)

# Plotting function
def plot_reduced_data(reduced_data, labels, title):
    plt.figure(figsize=(10, 8))
    plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6)
    for i, label in enumerate(labels):
        plt.annotate("  " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18)
    plt.title(title)
 
    
    plt.xlabel('Component 1')
    plt.ylabel('Component 2')
    plt.grid(True)
    plt.savefig(cache) #, dpi=300)

description = """
### Word Embedding Demo App
Universidade Federal de São Paulo - Escola Paulista de Medicina

The output is the 10 words most correlated with Word 1. All words must be entered in small caps.

Credits:  
* Gensim
* Word2Vec
"""

Word1 = gr.Textbox()
sp = gr.Image()


def inference(word1):

    output = model.similar_by_vector(word1)
    print(output)
    word_list = [word1]
    word_list.extend([x for x,y in [item for item in output]])
    words = {key: model[key] for key in word_list}
    data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
    print(data.shape)
    labels = words.keys()
    reduced_data_pca = reduce_dimensions(data, method='PCA')
    print(reduced_data_pca.shape)
    plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
    return cache

examples = [
    ["liver"],  
    ["brainstem"],
    ["lungs"]
]

iface = gr.Interface(
    fn=inference,
    inputs=[Word1],
    outputs=sp,
    description=description,
    examples=examples
    )

iface.launch()