Vizznu19 commited on
Commit
735829b
·
verified ·
1 Parent(s): f505e41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -24
app.py CHANGED
@@ -22,33 +22,44 @@ app.add_middleware(
22
  # Mount static files for Hugging Face Spaces
23
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
24
 
25
- # Load data
26
- faq_df = pd.read_csv("BankFAQs.csv", usecols=["Question", "Answer"])
27
- questions = faq_df["Question"].astype(str).tolist()
28
- answers = faq_df["Answer"].astype(str).tolist()
 
29
 
30
- # Chunking function: split text into sentences
31
- sentence_splitter = re.compile(r'(?<=[.!?]) +')
32
- def chunk_text(text):
33
- return [chunk.strip() for chunk in sentence_splitter.split(text) if chunk.strip()]
 
 
 
 
 
34
 
35
- # Prepare chunked data
36
- chunked_questions = [] # Parent question for each chunk
37
- chunks = [] # The actual chunk text
38
- chunked_answers = [] # Full answer for reference
39
- for q, a in zip(questions, answers):
40
- answer_chunks = chunk_text(a)
41
- for chunk in answer_chunks:
42
- chunked_questions.append(q)
43
- chunks.append(chunk)
44
- chunked_answers.append(a)
45
 
46
- # Load model and build index
47
- model = SentenceTransformer("all-MiniLM-L6-v2")
48
- chunk_embeddings = model.encode(chunks)
49
- chunk_embeddings = np.array(chunk_embeddings).astype("float32")
50
- chunk_index = faiss.IndexFlatL2(chunk_embeddings.shape[1])
51
- chunk_index.add(chunk_embeddings)
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  class QueryRequest(BaseModel):
54
  query: str
@@ -56,6 +67,9 @@ class QueryRequest(BaseModel):
56
 
57
  @app.post("/search")
58
  async def search_faq(req: QueryRequest):
 
 
 
59
  query_embedding = model.encode([req.query]).astype("float32")
60
  D, I = chunk_index.search(query_embedding, req.k)
61
  # Calculate cosine similarity from L2 distance
 
22
  # Mount static files for Hugging Face Spaces
23
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
24
 
25
+ # Global variables for lazy initialization
26
+ model = None
27
+ chunk_index = None
28
+ chunked_questions = None
29
+ chunked_answers = None
30
 
31
+ def initialize_model():
32
+ """Initialize the model and data on first use"""
33
+ global model, chunk_index, chunked_questions, chunked_answers
34
+
35
+ if model is None:
36
+ # Load data
37
+ faq_df = pd.read_csv("BankFAQs.csv", usecols=["Question", "Answer"])
38
+ questions = faq_df["Question"].astype(str).tolist()
39
+ answers = faq_df["Answer"].astype(str).tolist()
40
 
41
+ # Chunking function: split text into sentences
42
+ sentence_splitter = re.compile(r'(?<=[.!?]) +')
43
+ def chunk_text(text):
44
+ return [chunk.strip() for chunk in sentence_splitter.split(text) if chunk.strip()]
 
 
 
 
 
 
45
 
46
+ # Prepare chunked data
47
+ chunked_questions = [] # Parent question for each chunk
48
+ chunks = [] # The actual chunk text
49
+ chunked_answers = [] # Full answer for reference
50
+ for q, a in zip(questions, answers):
51
+ answer_chunks = chunk_text(a)
52
+ for chunk in answer_chunks:
53
+ chunked_questions.append(q)
54
+ chunks.append(chunk)
55
+ chunked_answers.append(a)
56
+
57
+ # Load model and build index
58
+ model = SentenceTransformer("all-MiniLM-L6-v2")
59
+ chunk_embeddings = model.encode(chunks)
60
+ chunk_embeddings = np.array(chunk_embeddings).astype("float32")
61
+ chunk_index = faiss.IndexFlatL2(chunk_embeddings.shape[1])
62
+ chunk_index.add(chunk_embeddings)
63
 
64
  class QueryRequest(BaseModel):
65
  query: str
 
67
 
68
  @app.post("/search")
69
  async def search_faq(req: QueryRequest):
70
+ # Initialize model on first request
71
+ initialize_model()
72
+
73
  query_embedding = model.encode([req.query]).astype("float32")
74
  D, I = chunk_index.search(query_embedding, req.k)
75
  # Calculate cosine similarity from L2 distance