Spaces:

Vizznu19
/

SamsungTV-FAQbot

Sleeping

App Files Files Community

Vizznu19 commited on Jun 26, 2025

Commit

4b9b98e

verified ·

1 Parent(s): 31a207e

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -76

app.py CHANGED Viewed

@@ -1,90 +1,75 @@
 import pandas as pd
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
-import re
-app = FastAPI(title="Bank FAQ Assistant", description="A semantic search FAQ system")
-# Allow CORS for local frontend
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Global variables for lazy initialization
-model = None
-chunk_index = None
-chunked_questions = None
-chunked_answers = None
-def initialize_model():
-    """Initialize the model and data on first use"""
-    global model, chunk_index, chunked_questions, chunked_answers
-    if model is None:
-        # Load data
-        faq_df = pd.read_csv("BankFAQs.csv", usecols=["Question", "Answer"])
-        questions = faq_df["Question"].astype(str).tolist()
-        answers = faq_df["Answer"].astype(str).tolist()
-        # Chunking function: split text into sentences
-        sentence_splitter = re.compile(r'(?<=[.!?]) +')
-        def chunk_text(text):
-            return [chunk.strip() for chunk in sentence_splitter.split(text) if chunk.strip()]
-        # Prepare chunked data
-        chunked_questions = []  # Parent question for each chunk
-        chunks = []             # The actual chunk text
-        chunked_answers = []    # Full answer for reference
-        for q, a in zip(questions, answers):
-            answer_chunks = chunk_text(a)
-            for chunk in answer_chunks:
-                chunked_questions.append(q)
-                chunks.append(chunk)
-                chunked_answers.append(a)
-        # Load model and build index
-        model = SentenceTransformer("all-MiniLM-L6-v2")
-        chunk_embeddings = model.encode(chunks)
-        chunk_embeddings = np.array(chunk_embeddings).astype("float32")
-        chunk_index = faiss.IndexFlatL2(chunk_embeddings.shape[1])
-        chunk_index.add(chunk_embeddings)
-class QueryRequest(BaseModel):
-    query: str
-    k: int = 1
-@app.get("/")
-def root():
-    return {"message": "Bank FAQ Assistant is running. Use /search endpoint to query."}
-@app.post("/search")
-async def search_faq(req: QueryRequest):
-    # Initialize model on first request
-    initialize_model()
-    query_embedding = model.encode([req.query]).astype("float32")
-    D, I = chunk_index.search(query_embedding, req.k)
-    # Calculate cosine similarity from L2 distance
-    # cosine_sim = 1 - (L2_distance^2 / 2)
-    similarities = 1 - (D[0] / 2)
-    threshold = 0.6
-    results = []
-    for idx, sim in zip(I[0], similarities):
-        if sim >= threshold:
-            results.append({
-                "question": chunked_questions[idx],
-                "full_answer": chunked_answers[idx]
-            })
-    return {"results": results}
-@app.get("/health")
-def health_check():
-    return {"status": "healthy", "message": "FAQ Assistant is ready"}

 import pandas as pd
+# Load dataset
+df = pd.read_csv("samsung_led_tv_faq_500.csv")
+df.head()
 from sentence_transformers import SentenceTransformer
+# Load pretrained model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# Generate embeddings
+question_embeddings = model.encode(df['Question'].tolist(), show_progress_bar=True)
+question_embeddings
 import faiss
 import numpy as np
+# Drop exact duplicate questions
+# Clean duplicates
+df = df.drop_duplicates(subset='Question').reset_index(drop=True)
+print(f"Total unique questions: {len(df)}")
+# Regenerate embeddings for cleaned DataFrame
+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer('all-MiniLM-L6-v2')
+question_embeddings = model.encode(df['Question'].tolist(), show_progress_bar=True)
+question_embeddings = np.array(question_embeddings).astype("float32")
+# Build FAISS index
+import faiss
+index = faiss.IndexFlatL2(question_embeddings.shape[1])
+index.add(question_embeddings)
+def search_faq(query, k=3):
+    query_embedding = model.encode([query]).astype("float32")
+    D, I = index.search(query_embedding, k)
+    results = []
+    for dist, i in zip(D[0], I[0]):
+        if i < len(df):
+            results.append((df.iloc[i]['Question'], df.iloc[i]['Answer'], dist))
+    return results
+query = "Can I mount the TV on a wall? (model UA48TU7069)"
+results = search_faq(query)
+print(f"Query: {query}\n")
+for q, a, d in results:
+    print(f"Matched Q: {q}\nAnswer: {a}\nDistance: {d:.4f}\n")
+import gradio as gr
+from gtts import gTTS
+import os
+def gradio_interface(query):
+    results = search_faq(query, k=1)
+    if results:
+        top_q, top_a, dist = results[0]
+        answer = top_a
+    else:
+        answer = "Sorry, I couldn't find a match."
+    # Generate audio with gTTS
+    tts = gTTS(text=answer, lang='en')
+    tts.save("answer.mp3")
+    return  "answer.mp3"
+demo = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.Textbox(lines=2, placeholder="Ask a question about your Samsung LED TV..."),
+    outputs=gr.Audio(label=""),
+    title="Samsung LED TV FAQ Assistant",
+    description="Ask queries about your Samsung LED TV. The assistant will speak the answer.",
+    theme="soft"
+)
+demo.launch(share=True)