vivianoh commited on
Commit
e6cc2e1
·
verified ·
1 Parent(s): e20ac9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -1
app.py CHANGED
@@ -2,9 +2,60 @@ import gradio as gr
2
  import random
3
  from huggingface_hub import InferenceClient
4
  # import lines go at the top: any libraries I need to import go up here ^^
5
-
 
6
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def respond(message, history):
9
 
10
  messages = [{"role": "system", "content": "You are a friendly chatbot."}]
@@ -27,4 +78,7 @@ def yes_or_no(message, history):
27
  chatbot = gr.ChatInterface(respond, type = "messages")
28
  # defining my chatbot so that the user can interact and see their conversation history and send new messages
29
 
 
 
 
30
  chatbot.launch()
 
2
  import random
3
  from huggingface_hub import InferenceClient
4
  # import lines go at the top: any libraries I need to import go up here ^^
5
+ from sentence_transformers import SentenceTransformer
6
+ import torch
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
+ # Step 1: Load the knowledge base
10
+ with open("Untitled document.txt", "r", encoding="utf-8") as f:
11
+ skincare_text = f.read()
12
+
13
+ # Step 2: Preprocess text into sentence chunks
14
+ def preprocess_text(text):
15
+ cleaned_text = text.strip()
16
+ chunks = cleaned_text.split(".")
17
+ cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
18
+ print(f"Sample chunks: {cleaned_chunks[:3]}")
19
+ print(f"There are {len(cleaned_chunks)} chunks.")
20
+ return cleaned_chunks
21
+
22
+ cleaned_chunks = preprocess_text(skincare_text)
23
+
24
+ # Step 3: Convert chunks into embeddings
25
+ from sentence_transformers import SentenceTransformer
26
+ import torch
27
+
28
+ model = SentenceTransformer('all-MiniLM-L6-v2')
29
+
30
+ def create_embeddings(text_chunks):
31
+ chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True)
32
+ print(f"Embeddings shape: {chunk_embeddings.shape}")
33
+ return chunk_embeddings
34
+
35
+ chunk_embeddings = create_embeddings(cleaned_chunks)
36
+
37
+ # Step 4: Retrieve top matching chunks
38
+ def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
39
+ query_embedding = model.encode(query, convert_to_tensor=True)
40
+ query_norm = query_embedding / query_embedding.norm()
41
+ chunks_norm = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
42
+ similarities = torch.matmul(chunks_norm, query_norm)
43
+ top_indices = torch.topk(similarities, k=top_k).indices
44
+ return [text_chunks[i] for i in top_indices]
45
+
46
+ # Step 5: Test the workflow with sample queries
47
+ queries = [
48
+ "Consistent skincare routine",
49
+ "Applying sunscreen daily",
50
+ "Choosing products that match your skin type"
51
+ ]
52
+
53
+ for q in queries:
54
+ print(f"\nQuery: {q}")
55
+ results = get_top_chunks(q, chunk_embeddings, cleaned_chunks)
56
+ for idx, res in enumerate(results, 1):
57
+ print(f"Result {idx}: {res}")
58
+
59
  def respond(message, history):
60
 
61
  messages = [{"role": "system", "content": "You are a friendly chatbot."}]
 
78
  chatbot = gr.ChatInterface(respond, type = "messages")
79
  # defining my chatbot so that the user can interact and see their conversation history and send new messages
80
 
81
+ top_results = get_top_chunks(question, chunk_embeddings, cleaned_chunks)
82
+ print(top_results)
83
+
84
  chatbot.launch()