lisaude0512 commited on
Commit
3a34165
·
verified ·
1 Parent(s): 63de516

adding basic code

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py CHANGED
@@ -4,6 +4,73 @@ import torch
4
  import numpy as np
5
  import random
6
  from huggingface_hub import InferenceClient
 
 
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", provider='hf-inference')
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  model = SentenceTransformer('all-MiniLM-L6-v2')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import numpy as np
5
  import random
6
  from huggingface_hub import InferenceClient
7
+
8
+ #LLM we are using
9
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", provider='hf-inference')
10
 
11
+ #adding text file
12
+ with open("be_a_better_you.txt", "r", encoding="utf-8") as file:
13
+ wellness_text = file.read()
14
+
15
+ #cleaning up the text
16
+ cleaned_text = wellness_text.strip()
17
+ chunks = cleaned_text.split("\n")
18
+ cleaned_chunks = []
19
+
20
+ #putting text in chunks
21
+ for chunk in chunks:
22
+ stripped_chunk = chunk.strip()
23
+ if stripped_chunk:
24
+ cleaned_chunks.append(stripped_chunk)
25
+
26
+ #import model for embeddings
27
  model = SentenceTransformer('all-MiniLM-L6-v2')
28
+
29
+ chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
30
+
31
+ def get_top_chunks(query):
32
+ # creating a function taking query as my parameter
33
+ query_embedding = model.encode(query, convert_to_tensor=True)
34
+ # encode query to vector embedding for comparison
35
+ query_embedding_normalized = query_embedding / query_embedding.norm()
36
+ # normalize query to 1: allows for comparison of meaning
37
+ chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
38
+ # normalizing chunks for comparison of meaning
39
+ similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
40
+ print(similarities)
41
+ # using matmul (matrix multiplication) method to compare query to chunks
42
+
43
+ top_indices = torch.topk(similarities, k=3).indices
44
+ print(top_indices)
45
+ # get the indices of the chunks thart are most similar to my query
46
+
47
+ top_chunks = []
48
+
49
+ for i in top_indices:
50
+ chunk = chunks[i]
51
+ # for each index number in top_indices, get back the text
52
+ top_chunks.append(chunk)
53
+
54
+ return top_chunks
55
+
56
+ def respond(message, history):
57
+ messages = [{"role": "system", "content": "You are a big sister chatbot named, Nessie. You help people feel better about their bodies and self-image."}]
58
+ # change the personality
59
+ context = get_top_chunks(message)
60
+ if history:
61
+ messages.extend(history)
62
+ messages.append({"role": "user", "content": message})
63
+
64
+ response = ""
65
+ for messages in client.chat_completion(
66
+ messages,
67
+ max_tokens = 500,
68
+ stream = True,
69
+ ):
70
+ token = messages.choices[0].delta.content
71
+ response+= token
72
+ yield response
73
+
74
+ chatbot = gr.ChatInterface(respond, type = "messages")
75
+ chatbot.launch(debug=True)
76
+