Spaces:
Sleeping
Sleeping
File size: 4,716 Bytes
0471526 000d536 ecb7ce9 000d536 9583212 000d536 9583212 000d536 9583212 000d536 dd96dad 000d536 dd96dad dc20447 dd96dad dc20447 dd96dad 0f1691c 000d536 53e1500 c07c85f 18d69a1 000d536 75041b5 c2a4832 0471526 ec6911d d1c28e7 e1277c9 864ade5 e1277c9 aa62262 8bc2f4d aa62262 e1277c9 361afb6 a04fbb8 361afb6 a04fbb8 e1277c9 0471526 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr
import random
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
# Open the travel_info.txt file in read mode with UTF-8 encoding
with open("travel_info.txt", "r", encoding="utf-8") as file:
# Read the entire contents of the file and store it in a variable
travel_text = file.read()
def preprocess_text(text):
# Strip extra whitespace from the beginning and the end of the text
cleaned_text = text.strip()
# Split the cleaned_text by every newline character (\n)
chunks = cleaned_text.split("\n")
# Create an empty list to store cleaned chunks
cleaned_chunks = []
# Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
for chunk in chunks:
stripped_chunk = chunk.strip()
cleaned_chunks.append(stripped_chunk)
# Print cleaned_chunks
print(cleaned_chunks)
# Print the length of cleaned_chunks
print(len(cleaned_chunks))
# Return the cleaned_chunks
return cleaned_chunks
# Call the preprocess_text function and store the result in a cleaned_chunks variable
cleaned_chunks = preprocess_text(travel_text) # Complete this line
model = SentenceTransformer('all-MiniLM-L6-v2')
def create_embeddings(text_chunks):
chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True)
print(chunk_embeddings)
print(chunk_embeddings.shape)
return chunk_embeddings
chunk_embeddings = create_embeddings(cleaned_chunks)
# Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
def get_top_chunks(query, chunk_embeddings, text_chunks):
# Convert the query text into a vector embedding
query_embedding = model.encode(query, convert_to_tensor = True) # Complete this line
# Normalize the query embedding to unit length for accurate similarity comparison
query_embedding_normalized = query_embedding / query_embedding.norm()
# Normalize all chunk embeddings to unit length for consistent comparison
chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
# Calculate cosine similarity between query and all chunks using matrix multiplication
similarities = torch.matmul(chunk_embeddings_normalized,query_embedding_normalized) # Complete this line
# Print the similarities
print(similarities)
# Find the indices of the 3 chunks with highest similarity scores
top_indices = torch.topk(similarities, k=3).indices
# Print the top indices
print(top_indices)
# Create an empty list to store the most relevant chunks
top_chunks = []
# Loop through the top indices and retrieve the corresponding text chunks
for i in top_indices:
top_chunks.append(text_chunks[i])
# Return the list of most relevant chunks
return top_chunks
client = InferenceClient("google/gemma-3-27b-it")
def respond(message, history):
information = get_top_chunks(message,chunk_embeddings,cleaned_chunks)
messages = [{"role":"system", "content": f"You are a friendly and informative chatbot. You answer in full sentences and do not repeat yourself. Be concise and limit your responses to 4 sentences. You base your response on the following information: {information}"}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
response = client.chat_completion(messages, max_tokens = 150)
return response["choices"][0]["message"]["content"].strip()
description = "GoGreen is here to help you make your travel experience more kind to the Earth. Whether or not you already have a destination in mind, GoGreen can help you plan! From popular spots to transportation needs, GoGreen has you covered. <br> To get started, ask a question: **<ul> <li> Where should I go travel? </li> <li> What fun activities are there in New York? </li> <li> How should I move around New England? </li></ul>**"
with gr.Blocks(theme = gr.themes.Soft(primary_hue="pink",secondary_hue="lime",neutral_hue="lime",text_size=gr.themes.sizes.text_lg)) as demo:
with gr.Row():
gr.Image("banner.png")
with gr.Row():
with gr.Column(scale = 1):
gr.Markdown(description)
gr.Dropdown(
["English","Spanish","Mandarin","French","Korean"], label = "Language", interactive = True
)
with gr.Column(scale = 2):
with gr.Tab("US 🇺🇸"):
gr.ChatInterface(respond, type = "messages")
with gr.Tab("Europe 🥖"):
gr.ChatInterface(respond, type = "messages")
with gr.Tab("China 🇨🇳"):
gr.ChatInterface(respond, type = "messages")
demo.launch()
|