Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import faiss | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| from datasets import load_dataset | |
| from datasets import concatenate_datasets | |
| # Load dataset from Hugging Face Hub | |
| dataset = load_dataset("fitlemon/ostap-phrases") | |
| # Access train/test splits | |
| train_data = dataset["train"] | |
| test_data = dataset["test"] | |
| # concat datasets | |
| dataset = concatenate_datasets([train_data, test_data]) | |
| questions = dataset["question"] | |
| answers = dataset["answer"] | |
| # Инициализируем эмбеддинг-модель | |
| model_bge = SentenceTransformer("fitlemon/bge-m3-ru-ostap") | |
| # Вычисляем эмбеддинги для всех вопросов | |
| answer_embeddings = model_bge.encode(answers, convert_to_numpy=True) | |
| # Создаём FAISS-индекс на базе вопросов, но в качестве метаданных нужно положить еще ответы | |
| index = faiss.IndexIDMap(faiss.IndexFlatIP(answer_embeddings.shape[1])) | |
| # Добавляем вопросы в индекс | |
| index.add_with_ids(answer_embeddings, np.arange(len(answers))) | |
| import gradio as gr | |
| import time | |
| with gr.Blocks() as app: | |
| chatbot = gr.Chatbot(type="messages") | |
| msg = gr.Textbox( | |
| label="Напиши свой вопрос Остапу Бендеру здесь...", | |
| placeholder="Привет, Остап!", | |
| ) | |
| clear = gr.ClearButton([msg, chatbot]) | |
| def respond(message, chat_history): | |
| query_emb = model_bge.encode([message], convert_to_numpy=True) | |
| _, idx = index.search(query_emb, 1) | |
| bot_message = answers[idx[0][0]] | |
| chat_history.append({"role": "user", "content": message}) | |
| chat_history.append({"role": "assistant", "content": bot_message}) | |
| time.sleep(2) | |
| return "", chat_history | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| if __name__ == "__main__": | |
| app.launch() |