File size: 2,747 Bytes
8780f91
 
9df23c7
8780f91
 
9df23c7
d9a50df
9df23c7
 
 
41a4817
9df23c7
 
41a4817
9df23c7
41a4817
d6bd421
9df23c7
484a212
41a4817
83fd6cd
41a4817
 
 
d6bd421
83fd6cd
782de8f
9df23c7
9aeb280
d6bd421
83fd6cd
41a4817
9df23c7
 
 
 
 
 
 
 
 
 
 
 
f66a1c2
d6bd421
9df23c7
 
 
 
 
 
 
 
 
 
8780f91
9df23c7
1ea629b
8780f91
 
9df23c7
 
8780f91
9df23c7
416a6d3
8780f91
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
import random
import torch
from huggingface_hub import InferenceClient

# Replace with actual token loading
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")

# Install sentence-transformers before running if not already installed
# !pip install -q sentence-transformers
from sentence_transformers import SentenceTransformer

# Load knowledge base
with open("knowledge.txt", "r", encoding="utf-8") as file:
    exp_know_text = file.read()
print(exp_know_text)

cleaned_text = exp_know_text.strip()
#the line below chunks based on the enter key. There are other options. 
chunks = cleaned_text.split("\n")
#in line above use "." instead to chunk by sentence. Or " " to chunk by word. 
cleaned_chunks = []
cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
print(cleaned_chunks)

# Embeddings (Matt's is different and combined with below)
model = SentenceTransformer('all-MiniLM-L6-v2')
chunk_embeddings = model.encode(chunks, convert_to_tensor=True)
print(chunk_embeddings)

# Similarity function (Matt's is different and combined with above)
def get_top_chunks(query):
    query_embedding = model.encode(query, convert_to_tensor=True)
    query_embedding_normalized = query_embedding / query_embedding.norm()
    chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
    similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
    top_indices = torch.topk(similarities, k=3).indices.tolist()
    print(top_indices)
    top_chunks=[]
    top_chunks = [chunks[i] for i in top_indices]
    return top_chunks

# Chatbot response
def respond(message, history):
    messages = [{"role": "system", "content": "You are a friendly chatbot. You help people understand cognitive biases using simple language."}]
    
    if history:
        for human, ai in history:
            messages.append({"role": "user", "content": human})
            messages.append({"role": "assistant", "content": ai})
    
    # Add top knowledge chunks
    top_chunks = get_top_chunks(message)
    context = "\n".join(top_chunks)
    messages.append({"role": "user", "content": f"{context}\n{message}"})

    response = client.chat_completion(
        messages=messages,
        max_tokens=200,
        temperature=0.2
    )
    
    return response.choices[0].message.content.strip()

# Launch UI
chatbot = gr.ChatInterface(fn=respond, chatbot=gr.Chatbot(), title="Let's Chat about Cognitive Biases!", description="Do you ever wonder how people can use shortcuts to make decisions, and how those shortcuts can bias our decision-making processes? This chatbot will engage you in learning about the different decision biases", theme="default")
chatbot.launch()