|
|
| import faiss |
| import gradio as gr |
| import numpy as np |
| import pandas as pd |
| from sentence_transformers import SentenceTransformer |
| import zipfile |
| import os |
| import logging |
|
|
| logging.basicConfig(level=logging.ERROR) |
|
|
|
|
| |
| with zipfile.ZipFile("files.zip", "r") as z: |
| z.extractall() |
|
|
| pr_number = 14 |
| logging.info("Loading embedding model") |
| model = SentenceTransformer( |
| "intfloat/multilingual-e5-small", |
| revision=f"refs/pr/{pr_number}", |
| backend="openvino", |
| ) |
|
|
| class FaissIndex: |
| def __init__( |
| self, |
| model: SentenceTransformer, |
| data_path: str = "faiss.lookup.csv", |
| index_path="faiss.index", |
| ): |
| self.model = model |
| self.df = pd.read_csv(data_path) |
| self.index = faiss.read_index(index_path) |
|
|
| def search(self, query, k=5): |
| query = np.array(query).astype("float32") |
| distances, indices = self.index.search(query, k) |
| return distances, indices |
|
|
| def extract_docs(self, indices, k): |
| indices = list(indices[0]) |
| lookup = self.df.iloc[indices] |
| questions = lookup["query"].values |
| answers = lookup["answer"].values |
|
|
| pairs = list(zip(questions, answers)) |
| |
| filtered_pairs = [] |
| seen = set() |
| for pair in pairs: |
| if pair[1] not in seen: |
| seen.add(pair[1]) |
| filtered_pairs.append(pair) |
|
|
| |
| formatted_pairs = [] |
| for pair in filtered_pairs: |
| formatted_pairs.append(f"{pair[1]}") |
| return formatted_pairs |
|
|
| def search(self, query: str, k: int = 5): |
| query = "query: " + query |
| enc = self.model.encode([query]) |
| emb = np.array(enc).astype("float32").reshape(1, -1) |
| _, indices = self.index.search(emb, k) |
| return self.extract_docs(indices, k) |
|
|
|
|
| logging.info("Loading FAISS index") |
| index = FaissIndex(model) |
|
|
|
|
| def query_faiss_index(søketekst): |
| if len(søketekst) < 3: |
| return |
| """ |
| Queries the FAISS index with the provided search text and returns the top 5 results. |
| Args: |
| søketekst (str): The search text to query the FAISS index. |
| Returns: |
| str: A string containing the top 5 search results, separated by double newlines. |
| """ |
|
|
| results = index.search(søketekst, k=2) |
| return "\n\n".join(results) |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| with gr.Blocks() as blocks: |
| gr.Markdown("## SIKT-FAQ") |
| with gr.Row(): |
| box_search = gr.Textbox(label="Søk etter informasjon i SIKT", lines=1, placeholder="Innlogging i FEIDE...", interactive=True) |
| with gr.Row(): |
| box_output = gr.Textbox(label="Søkeresultater", type="text", lines=20) |
| |
| box_search.change(fn=query_faiss_index, inputs=box_search, outputs=box_output, max_batch_size=1) |
|
|
|
|
| blocks.launch() |
| |
| |
| |
|
|