|
|
import streamlit as st |
|
|
import tensorflow as tf |
|
|
import os |
|
|
import numpy as np |
|
|
from scipy.spatial.distance import cosine |
|
|
|
|
|
from two_tower_model import TwoTowerModel |
|
|
|
|
|
TWO_TOWER_MODEL_SAVE_PATH = os.path.join("src","two_tower_model.keras") |
|
|
TWO_TOWER_VOCAB_PATH = os.path.join("src","two_tower_vocabulary.json") |
|
|
TWO_TOWER_EMBEDDING_PATH = os.path.join("src","two_tower_all_embeddings.npy") |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Kelime Benzerliği Uygulaması", layout="wide") |
|
|
|
|
|
st.title("Kelime Benzerliği Arama") |
|
|
st.write("Girdiğiniz kelimeye en yakın kelimeleri bulmak için modelleri kullanın.") |
|
|
|
|
|
def load_vocabulary(vocab_path: str) -> list[str]: |
|
|
import json |
|
|
with open(vocab_path, 'r', encoding='utf-8') as f: |
|
|
return json.load(f) |
|
|
|
|
|
@st.cache_resource |
|
|
def load_models_and_vocab(): |
|
|
|
|
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' |
|
|
|
|
|
two_tower_model_loaded = None |
|
|
two_tower_vocab = [] |
|
|
all_vocab_embeddings = None |
|
|
|
|
|
try: |
|
|
two_tower_vocab = load_vocabulary(TWO_TOWER_VOCAB_PATH) |
|
|
except Exception as e: |
|
|
st.error(f"Two-Tower kelime dağarcığı yüklenirken hata oluştu: {e}.") |
|
|
two_tower_vocab = [] |
|
|
|
|
|
try: |
|
|
all_vocab_embeddings = np.load(TWO_TOWER_EMBEDDING_PATH) |
|
|
except Exception as e: |
|
|
st.error(f"Kelime embeddingleri yüklenirken hata oluştu: {e}.") |
|
|
all_vocab_embeddings = None |
|
|
|
|
|
try: |
|
|
custom_objects = {"TwoTowerModel": TwoTowerModel} |
|
|
two_tower_model_loaded = tf.keras.models.load_model(TWO_TOWER_MODEL_SAVE_PATH, custom_objects=custom_objects) |
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Two-Tower modeli yüklenirken hata oluştu: {e}.") |
|
|
|
|
|
return two_tower_model_loaded, two_tower_vocab, all_vocab_embeddings |
|
|
|
|
|
two_tower_model, vocabulary, all_vocab_embeddings = load_models_and_vocab() |
|
|
|
|
|
def find_similar_words_two_tower(query_word, two_tower_model, vocabulary, all_vocab_embeddings, top_n=10): |
|
|
if two_tower_model is None or not vocabulary or all_vocab_embeddings is None: |
|
|
st.warning("Two-Tower modeli veya kelime dağarcığı/gömmeler yüklenemedi.") |
|
|
return [] |
|
|
|
|
|
if query_word not in vocabulary: |
|
|
st.warning(f"''{query_word}'' kelimesi kelime dağarcığında bulunmuyor. Benzerlikler doğru olmayabilir.") |
|
|
query_embedding = two_tower_model.get_embedding(tf.constant([vocabulary[0]])) |
|
|
else: |
|
|
query_embedding = two_tower_model.get_embedding(tf.constant([query_word])) |
|
|
|
|
|
similarities = [] |
|
|
for i, word_embedding in enumerate(all_vocab_embeddings): |
|
|
similarity = 1 - cosine(query_embedding.numpy().flatten(), word_embedding.flatten()) |
|
|
similarities.append((vocabulary[i], similarity)) |
|
|
|
|
|
similarities = sorted(similarities, key=lambda x: x[1], reverse=True) |
|
|
similarities = [s for s in similarities if s[0] != query_word] |
|
|
return similarities[:top_n] |
|
|
|
|
|
query_word_input = st.selectbox("Kelime Seçin", options=vocabulary) |
|
|
if query_word_input: |
|
|
st.subheader(f"''{query_word_input}'' kelimesi için Benzerlik Sonuçları") |
|
|
|
|
|
st.markdown("#### Two-Tower Model Sonuçları") |
|
|
two_tower_results = find_similar_words_two_tower(query_word_input, two_tower_model, vocabulary, all_vocab_embeddings) |
|
|
if two_tower_results: |
|
|
for word, score in two_tower_results: |
|
|
st.write(f"{word}: {score:.4f}") |
|
|
else: |
|
|
st.write("Two-Tower modeli için benzer kelime bulunamadı veya model/kelime dağarcığı/embeddingler yüklenemedi.") |
|
|
|
|
|
|