File size: 4,643 Bytes
4be0b89
 
5109df5
 
 
4be0b89
 
b5287f9
4be0b89
2812213
 
 
 
 
ea8868e
2812213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4be0b89
 
7fdee5a
 
5109df5
4be0b89
5109df5
4be0b89
5109df5
4be0b89
b5287f9
 
 
4be0b89
b5287f9
4be0b89
 
5109df5
4be0b89
 
 
 
 
5109df5
 
 
1d9d007
 
5109df5
 
 
 
 
 
 
 
 
4be0b89
2070f26
a65689c
2070f26
0928397
 
a65689c
 
 
 
aa11f47
a65689c
2070f26
 
 
 
 
 
 
 
5109df5
2070f26
5109df5
 
2812213
bc8279c
a97cb3b
2070f26
 
4be0b89
75297ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
import numpy as np

# this client will handle making requests to the model to generate responses
client = InferenceClient("google/gemma-2-2b-it")

# Load and process the knowledge base text file
with open("knowledge.txt", "r", encoding="utf-8") as f:
    knowledge_text = f.read()

# Split the text into chunks (for example, by paragraphs)
chunks = [chunk.strip() for chunk in knowledge_text.split("\n") if chunk.strip()]

# Load an embedding model (this one is light and fast)
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Precompute embeddings for all chunks (as a tensor for fast similarity search)
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)

def get_relevant_context(query, top_k=3):
    """
     Compute the embedding for the query, compare it against all chunk embeddings,
     and return the top_k most similar chunks concatenated into a context string.
    """
    
    # Compute and normalize the query embedding
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    query_embedding = query_embedding / query_embedding.norm()
    
    # Normalize chunk embeddings along the embedding dimension
    norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
    
    # Compute cosine similarity between the query and each chunk
    similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
    
    # Get the indices of the top_k most similar chunks
    top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
    
    # Concatenate the top chunks into a single context string
    context = "\n\n".join([chunks[i] for i in top_k_indices])
    return context

def respond(message, history):

    system_message = "You are a kitchen sous chef. You always respond with a knowledgeable and upbeat attitude! Do not answer questions unrelated to cooking. If the user asks an unrelated question, tell them you can only answer questions about cooking and do not answer their question."


    # initialize a list of dictionaries to store the messages
    messages = [{"role": "system",
                 "content": system_message}]

    if history:
        for user_msg, assistant_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": assistant_msg})

    # add the current user's message to the messages list
    messages.append({"role": "user", "content": message})

    # makes the chat completion API call,
    # sending the messages and other parameters to the model
    # implements streaming, where one word/token appears at a time
    response = ""

    # iterate through each message in the method
    try:
      for message in client.chat_completion(
          messages,
          max_tokens=150,
          temperature=0.8,
          stream=True
      ):
          # add the tokens to the output content
          token = message.choices[0].delta.content # capture the most recent token
          response += token # Add it to the response
          yield response # yield the response

    except Exception as e:
        print(f"An error occurred: {e}")

# Create the Gradio chat interface (using Gradio's ChatInterface)
title = "# SousChef AI 🍳"
topics = """
### I'm here to help you level up in the kitchen!
If you're not sure where to start, try using one of the messages below:
- How should I cook steak if I want it medium-rare?
- What's a good recipe to make for a vegetarian potluck?
- I'm making cookies and don't have baking soda, help!
- How can I chop onions without crying?
- Give me some dishes that combine Indian and Mediterranean cuisine.
- How do I soft boil an egg?
"""
disclaimer = "SousChef AI is only designed to provide recommendations on how to prepare and cook different recipes. It is not responsible for any kitchen disasters or bad-tasting meals, that's all you chef!"

with gr.Blocks() as chatbot:
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown(title)
            gr.Markdown(topics)

        with gr.Column(scale=2):
            gr.ChatInterface(respond,
                             type="messages",
                             examples = ["What are some cooking tips for beginners?", "What toppings should I add to my ramen?", "What's the best recipe for homemade pizza dough?"],
                             theme= 'shivi/calm_seafoam'
                            )
    with gr.Row():
        gr.Markdown(disclaimer)

chatbot.launch(debug=True)