Vizznu19 commited on
Commit
4b9b98e
·
verified ·
1 Parent(s): 31a207e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -76
app.py CHANGED
@@ -1,90 +1,75 @@
1
  import pandas as pd
2
- from fastapi import FastAPI
3
- from fastapi.middleware.cors import CORSMiddleware
4
- from pydantic import BaseModel
 
 
5
  from sentence_transformers import SentenceTransformer
 
 
 
 
 
 
 
6
  import faiss
7
  import numpy as np
8
- import re
9
-
10
- app = FastAPI(title="Bank FAQ Assistant", description="A semantic search FAQ system")
 
11
 
12
- # Allow CORS for local frontend
13
- app.add_middleware(
14
- CORSMiddleware,
15
- allow_origins=["*"],
16
- allow_credentials=True,
17
- allow_methods=["*"],
18
- allow_headers=["*"],
19
- )
20
 
21
- # Global variables for lazy initialization
22
- model = None
23
- chunk_index = None
24
- chunked_questions = None
25
- chunked_answers = None
26
 
27
- def initialize_model():
28
- """Initialize the model and data on first use"""
29
- global model, chunk_index, chunked_questions, chunked_answers
30
-
31
- if model is None:
32
- # Load data
33
- faq_df = pd.read_csv("BankFAQs.csv", usecols=["Question", "Answer"])
34
- questions = faq_df["Question"].astype(str).tolist()
35
- answers = faq_df["Answer"].astype(str).tolist()
 
36
 
37
- # Chunking function: split text into sentences
38
- sentence_splitter = re.compile(r'(?<=[.!?]) +')
39
- def chunk_text(text):
40
- return [chunk.strip() for chunk in sentence_splitter.split(text) if chunk.strip()]
 
 
41
 
42
- # Prepare chunked data
43
- chunked_questions = [] # Parent question for each chunk
44
- chunks = [] # The actual chunk text
45
- chunked_answers = [] # Full answer for reference
46
- for q, a in zip(questions, answers):
47
- answer_chunks = chunk_text(a)
48
- for chunk in answer_chunks:
49
- chunked_questions.append(q)
50
- chunks.append(chunk)
51
- chunked_answers.append(a)
52
 
53
- # Load model and build index
54
- model = SentenceTransformer("all-MiniLM-L6-v2")
55
- chunk_embeddings = model.encode(chunks)
56
- chunk_embeddings = np.array(chunk_embeddings).astype("float32")
57
- chunk_index = faiss.IndexFlatL2(chunk_embeddings.shape[1])
58
- chunk_index.add(chunk_embeddings)
59
 
60
- class QueryRequest(BaseModel):
61
- query: str
62
- k: int = 1
63
 
64
- @app.get("/")
65
- def root():
66
- return {"message": "Bank FAQ Assistant is running. Use /search endpoint to query."}
67
 
68
- @app.post("/search")
69
- async def search_faq(req: QueryRequest):
70
- # Initialize model on first request
71
- initialize_model()
72
-
73
- query_embedding = model.encode([req.query]).astype("float32")
74
- D, I = chunk_index.search(query_embedding, req.k)
75
- # Calculate cosine similarity from L2 distance
76
- # cosine_sim = 1 - (L2_distance^2 / 2)
77
- similarities = 1 - (D[0] / 2)
78
- threshold = 0.6
79
- results = []
80
- for idx, sim in zip(I[0], similarities):
81
- if sim >= threshold:
82
- results.append({
83
- "question": chunked_questions[idx],
84
- "full_answer": chunked_answers[idx]
85
- })
86
- return {"results": results}
87
 
88
- @app.get("/health")
89
- def health_check():
90
- return {"status": "healthy", "message": "FAQ Assistant is ready"}
 
1
  import pandas as pd
2
+
3
+
4
+ # Load dataset
5
+ df = pd.read_csv("samsung_led_tv_faq_500.csv")
6
+ df.head()
7
  from sentence_transformers import SentenceTransformer
8
+
9
+ # Load pretrained model
10
+ model = SentenceTransformer('all-MiniLM-L6-v2')
11
+
12
+ # Generate embeddings
13
+ question_embeddings = model.encode(df['Question'].tolist(), show_progress_bar=True)
14
+ question_embeddings
15
  import faiss
16
  import numpy as np
17
+ # Drop exact duplicate questions
18
+ # Clean duplicates
19
+ df = df.drop_duplicates(subset='Question').reset_index(drop=True)
20
+ print(f"Total unique questions: {len(df)}")
21
 
22
+ # Regenerate embeddings for cleaned DataFrame
23
+ from sentence_transformers import SentenceTransformer
24
+ model = SentenceTransformer('all-MiniLM-L6-v2')
25
+ question_embeddings = model.encode(df['Question'].tolist(), show_progress_bar=True)
26
+ question_embeddings = np.array(question_embeddings).astype("float32")
 
 
 
27
 
28
+ # Build FAISS index
29
+ import faiss
30
+ index = faiss.IndexFlatL2(question_embeddings.shape[1])
31
+ index.add(question_embeddings)
 
32
 
33
+ def search_faq(query, k=3):
34
+ query_embedding = model.encode([query]).astype("float32")
35
+ D, I = index.search(query_embedding, k)
36
+ results = []
37
+ for dist, i in zip(D[0], I[0]):
38
+ if i < len(df):
39
+ results.append((df.iloc[i]['Question'], df.iloc[i]['Answer'], dist))
40
+ return results
41
+ query = "Can I mount the TV on a wall? (model UA48TU7069)"
42
+ results = search_faq(query)
43
 
44
+ print(f"Query: {query}\n")
45
+ for q, a, d in results:
46
+ print(f"Matched Q: {q}\nAnswer: {a}\nDistance: {d:.4f}\n")
47
+ import gradio as gr
48
+ from gtts import gTTS
49
+ import os
50
 
51
+ def gradio_interface(query):
52
+ results = search_faq(query, k=1)
 
 
 
 
 
 
 
 
53
 
54
+ if results:
55
+ top_q, top_a, dist = results[0]
56
+ answer = top_a
57
+ else:
58
+ answer = "Sorry, I couldn't find a match."
 
59
 
60
+ # Generate audio with gTTS
61
+ tts = gTTS(text=answer, lang='en')
62
+ tts.save("answer.mp3")
63
 
64
+ return "answer.mp3"
 
 
65
 
66
+ demo = gr.Interface(
67
+ fn=gradio_interface,
68
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question about your Samsung LED TV..."),
69
+ outputs=gr.Audio(label=""),
70
+ title="Samsung LED TV FAQ Assistant",
71
+ description="Ask queries about your Samsung LED TV. The assistant will speak the answer.",
72
+ theme="soft"
73
+ )
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ demo.launch(share=True)