Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from transformers import BertTokenizer, BertModel, pipeline | |
| import torch | |
| import numpy as np | |
| from sklearn.decomposition import PCA | |
| from sklearn.preprocessing import StandardScaler | |
| modelo_bert = BertModel.from_pretrained('bert-base-uncased') | |
| tokenizador_bert = BertTokenizer.from_pretrained('bert-base-uncased') | |
| def obtener_representaciones(texto): | |
| tokens = tokenizador_bert.tokenize(tokenizador_bert.decode(tokenizador_bert.encode(texto))) | |
| ids = tokenizador_bert.convert_tokens_to_ids(tokens) | |
| input_ids = torch.tensor(ids).unsqueeze(0) | |
| with torch.no_grad(): | |
| outputs = modelo_bert(input_ids) | |
| return outputs.last_hidden_state | |
| def calcular_similitud(representaciones): | |
| similarity_matrix = np.dot(representaciones, representaciones.T) | |
| np.fill_diagonal(similarity_matrix, 0) | |
| return similarity_matrix | |
| def analizar_sentimiento(texto): | |
| analizador_sentimientos = pipeline("sentiment-analysis") | |
| resultado = analizador_sentimientos(texto) | |
| return resultado[0]['score'], resultado[0]['label'] | |
| def obtener_tamano_color_relevancia_esferas(df, relevancia_minima): | |
| sizes = [] | |
| colors = [] | |
| for i in range(len(df)): | |
| relevance = np.max(df.iloc[i, :-4]) | |
| size = 2 + relevance * 7 | |
| color = 'rgba(0, 0, 255, 0.8)' if relevance < relevancia_minima else 'rgba(255, 0, 0, 0.8)' | |
| sizes.append(size) | |
| colors.append(color) | |
| return sizes, colors | |
| def obtener_color_transparencia_lineas(df): | |
| colores = ['green', 'blue', 'yellow', 'red', 'purple', 'orange'] | |
| color_and_opacity = [] | |
| color_index = 0 | |
| for i in range(len(df)): | |
| for j in range(i + 1, len(df)): | |
| relevance = np.linalg.norm(df.iloc[i, :-4] - df.iloc[j, :-4]) | |
| sentimiento_promedio = (analizar_sentimiento(df['Hashtag'][i])[0] + analizar_sentimiento(df['Hashtag'][j])[0]) / 2 | |
| color = colores[color_index] | |
| color_index = (color_index + 1) % len(colores) | |
| color_and_opacity.append((color, relevance, 0.6)) | |
| return color_and_opacity | |
| def generar_nube_palabras_3d(hashtags, relevancia_minima): | |
| representaciones = [obtener_representaciones(palabra).mean(dim=1).squeeze().numpy() for palabra in hashtags.split()] | |
| df = pd.DataFrame(np.vstack(representaciones), columns=[f'Feature_{i}' for i in range(768)]) | |
| df['Hashtag'] = hashtags.split() | |
| pca = PCA(n_components=3) | |
| scaler = StandardScaler() | |
| df_scaled = scaler.fit_transform(df.iloc[:, :-1]) | |
| pca_result = pca.fit_transform(df_scaled) | |
| df['PCA1'], df['PCA2'], df['PCA3'] = pca_result[:, 0], pca_result[:, 1], pca_result[:, 2] | |
| similarity_matrix = calcular_similitud(df.iloc[:, :-4]) | |
| sizes, colors = obtener_tamano_color_relevancia_esferas(df, relevancia_minima) | |
| color_and_opacity = obtener_color_transparencia_lineas(df) | |
| df['CuartaDim'] = np.random.rand(len(df)) | |
| fig = px.scatter_3d(df, x='PCA1', y='PCA2', z='PCA3', text='Hashtag', color='CuartaDim', | |
| title="Nube de Palabras 3D", opacity=0.9, width=1000, height=1000, | |
| color_continuous_scale='Viridis') | |
| for i in range(len(df)): | |
| for j in range(i + 1, len(df)): | |
| color, width, opacity = color_and_opacity.pop(0) | |
| fig.add_trace(go.Scatter3d(x=[df.iloc[i]['PCA1'], df.iloc[j]['PCA1']], | |
| y=[df.iloc[i]['PCA2'], df.iloc[j]['PCA2']], | |
| z=[df.iloc[i]['PCA3'], df.iloc[j]['PCA3']], | |
| mode='lines', | |
| line=dict(width=width, color=color), | |
| opacity=opacity)) | |
| for i in range(len(df)): | |
| size = sizes[i] | |
| color = colors[i] | |
| fig.add_trace(go.Scatter3d(x=[df.iloc[i]['PCA1']], | |
| y=[df.iloc[i]['PCA2']], | |
| z=[df.iloc[i]['PCA3']], | |
| mode='markers', | |
| marker=dict(size=size, color=color, opacity=0.1), | |
| text=[df.iloc[i]['Hashtag']])) | |
| st.plotly_chart(fig) | |
| def main(): | |
| st.title("Nube de Palabras 3D con BERT") | |
| palabra_clave = st.text_input("Ingrese una palabra clave") | |
| relevancia_minima = st.slider("Seleccione la relevancia mínima", 0.0, 1.0, 0.5, step=0.01) | |
| if palabra_clave and st.button("Generar Nube de Palabras"): | |
| fill_mask = pipeline('fill-mask', model='bert-base-uncased') | |
| resultados = fill_mask(f"{palabra_clave} [MASK]", top_k=8) | |
| keywords = [resultado['token_str'] for resultado in resultados] | |
| keywords_str = ' '.join(keywords) | |
| generar_nube_palabras_3d(keywords_str, relevancia_minima) | |
| if __name__ == "__main__": | |
| main() |