Spaces:
Sleeping
Sleeping
| # MiniLM Semantic FAQ Search β CPU-only HF Space | |
| # Works out-of-the-box with faqs.csv in the same folder. | |
| import re | |
| from pathlib import Path | |
| import gradio as gr | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer, util | |
| # ------- paths & model ------------------------------------------------- | |
| BASE_DIR = Path(__file__).parent | |
| CSV_FILE = BASE_DIR / "faqs.csv" | |
| MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" | |
| # ------- load FAQ data ------------------------------------------------- | |
| if not CSV_FILE.exists(): | |
| raise FileNotFoundError( | |
| f"{CSV_FILE} missing. Make sure faqs.csv is in the repo root." | |
| ) | |
| faq_df = pd.read_csv(CSV_FILE) | |
| questions = faq_df["question"].tolist() | |
| answers = faq_df["answer"].tolist() | |
| # ------- embed questions ---------------------------------------------- | |
| model = SentenceTransformer(MODEL_NAME) | |
| question_embs = model.encode( | |
| questions, convert_to_tensor=True, normalize_embeddings=True | |
| ) | |
| # ------- tiny emoji tagger -------------------------------------------- | |
| EMOJI_RULES = { | |
| r"\b(shampoo|conditioner|mask)\b" : "π§΄", | |
| r"\b(hair\s?spray|spray)\b" : "π¨", | |
| r"\b(vegan|botanical|organic)\b" : "π±", | |
| r"\b(heat|thermal|hot)\b" : "π₯", | |
| r"\b(balayage|color|colour|dye)\b" : "πββοΈ", | |
| r"\b(scissors|cut|trim)\b" : "βοΈ", | |
| } | |
| def emoji_for(text: str) -> str: | |
| for pattern, emo in EMOJI_RULES.items(): | |
| if re.search(pattern, text, flags=re.I): | |
| return emo | |
| return "β" | |
| # ------- search function ---------------------------------------------- | |
| def search_faq(query: str, top_k: int): | |
| if not query.strip(): | |
| return pd.DataFrame( | |
| columns=["Emoji", "Question", "Answer", "Score"] | |
| ) | |
| q_emb = model.encode(query, convert_to_tensor=True, normalize_embeddings=True) | |
| sims = util.cos_sim(q_emb, question_embs)[0] | |
| idx_top = sims.topk(k=top_k).indices.cpu().tolist() | |
| rows = [ | |
| [emoji_for(answers[i]), questions[i], answers[i], round(float(sims[i]), 3)] | |
| for i in idx_top | |
| ] | |
| return pd.DataFrame(rows, columns=["Emoji", "Question", "Answer", "Score"]) | |
| # ------- Gradio UI ----------------------------------------------------- | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Semantic FAQ Search") as demo: | |
| gr.Markdown("# π Semantic FAQ Search") | |
| with gr.Row(): | |
| q_in = gr.Textbox( | |
| label="Ask a question", | |
| lines=2, | |
| placeholder="e.g. Which spray protects hair from heat?" | |
| ) | |
| k_in = gr.Slider(1, 5, value=3, step=1, label="Results") | |
| search_btn = gr.Button("Search", variant="primary") | |
| table_out = gr.Dataframe( | |
| headers=["Emoji", "Question", "Answer", "Score"], | |
| datatype=["str", "str", "str", "number"], | |
| wrap=True, | |
| interactive=False | |
| ) | |
| search_btn.click(search_faq, [q_in, k_in], table_out) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0") | |