File size: 3,145 Bytes
235b1ba
1141e72
6a07e12
 
3b2955b
6a07e12
ef74b61
41bc4c0
5365751
fef5776
235b1ba
5365751
 
 
 
 
 
 
 
 
 
 
7a49b3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5365751
d988bde
 
6099d18
522c4c0
d988bde
 
 
 
 
 
 
 
f4f569e
d988bde
0d31859
 
d988bde
 
 
 
 
 
44dee58
f715e9b
 
 
924fdc4
 
f715e9b
baf17f8
0314f35
baf17f8
d3888c8
3fcd81d
f715e9b
 
 
 
7b718cf
4ea7bf8
762a536
4ea7bf8
762a536
bcfe3e4
22c87f5
894f942
 
 
 
 
 
 
 
22c87f5
6ec3458
 
d988bde
 
235b1ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
import numpy as np

with open("knowledge.txt" , "r", encoding="utf-8") as f:
    knowledge_base = f.read()

print("Knowledge base loaded.")

cleaned_text = knowledge_base.strip()

chunks = cleaned_text.split("\n")
cleaned_chunks = []

for chunk in chunks:
  stripped_chunk = chunk.strip()
  if stripped_chunk:
    cleaned_chunks.append(stripped_chunk)
print(cleaned_chunks)

model = SentenceTransformer('all-MiniLM-L6-v2')
chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
print(chunk_embeddings)

def get_top_chunks(query):
  query_embedding = model.encode(query, convert_to_tensor=True)
  query_embedding_normalized = query_embedding / query_embedding.norm()
  chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)

  similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
  print(similarities)

  top_indices = torch.topk(similarities, k=3).indices
  print(top_indices)

  top_chunks = []

  for i in top_indices:
    chunk = chunks[i]
    top_chunks.append(chunk)

  return top_chunks

client = InferenceClient("google/gemma-3-27b-it")

def respond(message,history):
    messages = [{"role": "system" , "content": "Your name is BloomBot and you're a supportive and helpful chatbot catered towards teens ages 10-18. You give clear kid-appropiate explainations and keep your explainations to 10 sentences maximum." 
                }]
    if history:
        messages.extend(history)
        
    messages.append({"role" : "user", "content" : message})
    
    response = ""
    for message in client.chat_completion(
        messages, 
        max_tokens = 500,
        stream=True,
        #temperature = .2
        top_p = .2
    ):
        token = message.choices[0].delta.content
        response += token
        yield response 
        
    print(response)
    
theme = gr.themes.Ocean(
    primary_hue="pink",
    secondary_hue="pink",
    neutral_hue="fuchsia"
    )

def display_image():
    return "BloomBotB (1).png"

with gr.Blocks (theme=theme) as chatbot:
    gr.Image(display_image())
    gr.ChatInterface(respond, type = "messages", #theme = gr.themes.Ocean(
    #primary_hue="pink",
    #secondary_hue="pink",
    #neutral_hue="fuchsia"),
    title = "Hi, I'm BloomBot!",                       
    examples = ["What are the different types of period products? ", 
                "What are some vitamins that are good for teenage girls?",
                "What should I know about puberty?"]
                          )
    with gr.Tab("Resources"):
        gr.Markdown("### Resources")
        gr.HTML("""
        <a href="https://drive.google.com/file/d/1_KNELAUDLLidwAT3fs2JBuO1yPgMGoDv/view" target="_blank">
            <button style="font-size:16px;padding:10px 20px;margin-top:10px;">
                ๐Ÿ“„ Period Tracker
            </button>
        </a>
    """)

         
     #with gr.Tab("Educational PDFs"):
       # gr.Markdown("### ๐Ÿ“˜ Helpful Resources")

chatbot.launch(debug=True)