File size: 3,456 Bytes
b5e306d c59fbd2 b5e306d c712625 b5e306d b85095d b5e306d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import streamlit as st
import tensorflow as tf
import os
import numpy as np
from scipy.spatial.distance import cosine
from two_tower_model import TwoTowerModel
TWO_TOWER_MODEL_SAVE_PATH = os.path.join("src","two_tower_model.keras")
TWO_TOWER_VOCAB_PATH = os.path.join("src","two_tower_vocabulary.json")
TWO_TOWER_EMBEDDING_PATH = os.path.join("src","two_tower_all_embeddings.npy")
st.set_page_config(page_title="Kelime Benzerliği Uygulaması", layout="wide")
st.title("Kelime Benzerliği Arama")
st.write("Girdiğiniz kelimeye en yakın kelimeleri bulmak için modelleri kullanın.")
def load_vocabulary(vocab_path: str) -> list[str]:
import json
with open(vocab_path, 'r', encoding='utf-8') as f:
return json.load(f)
@st.cache_resource
def load_models_and_vocab():
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
two_tower_model_loaded = None
two_tower_vocab = []
all_vocab_embeddings = None
try:
two_tower_vocab = load_vocabulary(TWO_TOWER_VOCAB_PATH)
except Exception as e:
st.error(f"Two-Tower kelime dağarcığı yüklenirken hata oluştu: {e}.")
two_tower_vocab = []
try:
all_vocab_embeddings = np.load(TWO_TOWER_EMBEDDING_PATH)
except Exception as e:
st.error(f"Kelime embeddingleri yüklenirken hata oluştu: {e}.")
all_vocab_embeddings = None
try:
custom_objects = {"TwoTowerModel": TwoTowerModel}
two_tower_model_loaded = tf.keras.models.load_model(TWO_TOWER_MODEL_SAVE_PATH, custom_objects=custom_objects)
except Exception as e:
st.error(f"Two-Tower modeli yüklenirken hata oluştu: {e}.")
return two_tower_model_loaded, two_tower_vocab, all_vocab_embeddings
two_tower_model, vocabulary, all_vocab_embeddings = load_models_and_vocab()
def find_similar_words_two_tower(query_word, two_tower_model, vocabulary, all_vocab_embeddings, top_n=10):
if two_tower_model is None or not vocabulary or all_vocab_embeddings is None:
st.warning("Two-Tower modeli veya kelime dağarcığı/gömmeler yüklenemedi.")
return []
if query_word not in vocabulary:
st.warning(f"''{query_word}'' kelimesi kelime dağarcığında bulunmuyor. Benzerlikler doğru olmayabilir.")
query_embedding = two_tower_model.get_embedding(tf.constant([vocabulary[0]]))
else:
query_embedding = two_tower_model.get_embedding(tf.constant([query_word]))
similarities = []
for i, word_embedding in enumerate(all_vocab_embeddings):
similarity = 1 - cosine(query_embedding.numpy().flatten(), word_embedding.flatten())
similarities.append((vocabulary[i], similarity))
similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
similarities = [s for s in similarities if s[0] != query_word]
return similarities[:top_n]
query_word_input = st.selectbox("Kelime Seçin", options=vocabulary)
if query_word_input:
st.subheader(f"''{query_word_input}'' kelimesi için Benzerlik Sonuçları")
st.markdown("#### Two-Tower Model Sonuçları")
two_tower_results = find_similar_words_two_tower(query_word_input, two_tower_model, vocabulary, all_vocab_embeddings)
if two_tower_results:
for word, score in two_tower_results:
st.write(f"{word}: {score:.4f}")
else:
st.write("Two-Tower modeli için benzer kelime bulunamadı veya model/kelime dağarcığı/embeddingler yüklenemedi.")
|