Spaces:

amburp
/

GreenGuidev2

Sleeping

File size: 5,979 Bytes

# !pip install -q sentence-transformers
from sentence_transformers import SentenceTransformer
import torch
import gradio as gr

with open("information.txt", "r", encoding="utf-8") as file:
    info_text = file.read()

print("Raw text preview:", info_text[:200])

# function that cleans the chunks yoo
def preprocess_text(text):
    chunks = text.split("\n")  # simple + reliable
    cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
    # cleaned_text = text.strip()
    # chunks = cleaned_text.split("\n")
    # cleaned_chunks = []
    
    # for chunk in chunks:
    #     chunk = chunk.strip()
    #     if chunk != "":
    #         cleaned_chunks:append(chunk)

    return cleaned_chunks
cleaned_chunks = preprocess_text(info_text)

if len(cleaned_chunks) == 0:
    raise ValueError("No valid text chunks found in information.txt")

print("Chunks:", cleaned_chunks)
print("Number of chunks:", len(cleaned_chunks))

# Load the pre-trained embedding model that converts text to vectors
model = SentenceTransformer('all-MiniLM-L6-v2')

def create_embeddings(text_chunks):
    embeddings = model.encode(text_chunks, convert_to_tensor=True)

    if embeddings.dim() == 1:
        embeddings = embeddings.unsqueeze(0)

    return embeddings

    # chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
    # return chunk_embeddings

chunk_embeddings = create_embeddings(cleaned_chunks) # Complete this line

# Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
def get_top_chunks(query, chunk_embeddings, text_chunks):
    query_embedding = model.encode(query,convert_to_tensor=True) # Complete this line
    query_embedding_normalized = query_embedding / query_embedding.norm()
    chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
    similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) # Complete this line
    
    # top_indices = torch.topk(similarities, k=3).indices

    k = min(3, len(text_chunks))
    top_indices = torch.topk(similarities, k=k).indices
    
    top_chunks = []
    
    for i in top_indices:
        top_chunks.append(text_chunks[int(i)])
    
    return top_chunks

def query_model(question):
    try: 
        """
        Process a question, find relevant information, and generate a response.
        """
        if question.strip() == "":
            return "Welcome to GreenGuide! Ask me anything about eco-friendly hotels, restaurants, and things to do in NYC."
    
        top_chunks = get_top_chunks(question, chunk_embeddings, cleaned_chunks)
    
        response = "Here are the most relevant results:\n\n"
        for i, chunk in enumerate(top_chunks, start=1):
            response += f"{i}. {chunk}\n\n"
    
        return response

    except Exception as e:
        print("ERROR:", e)
        return f"Error: {str(e)}"
    
    # relevant_segment = find_relevant_segment(question, segments)
    # if not relevant_segment:
    #     return "Could not find specific information. Please refine your question."
    # response = generate_response(question, relevant_segment)
    # return response


def display_iframe():
    return iframe
def display_image():
    return "https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExZzdqMnkzcWpjbGhmM3hzcXp0MGpuaTF5djR4bjBxM3Biam5zbzNnMCZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9cw/GxMnTi3hV3qaIgbgQL/giphy.gif"
theme = gr.themes.Monochrome(
    primary_hue="amber", #okay this did NOT work lmaoo
    secondary_hue="rose",
).set(
    background_fill_primary='#CBE9A2',  # BACKGROUND
    background_fill_primary_dark='#768550',
    background_fill_secondary='#768550',  # BUTTON HOVER
    background_fill_secondary_dark='#99a381', #LOADING BAR
    border_color_accent='#768550',
    border_color_accent_dark='#768550',
    border_color_accent_subdued='#768550',
    border_color_primary='#03a9f4',
    block_border_color='#b3e5fc',
    button_primary_background_fill='#768550',
    button_primary_background_fill_dark='#768550'
)

iframe = '''
<iframe src="https://www.google.com/maps/embed?pb=!1m18!1m12!1m3!1d193595.2528001417!2d-74.1444872802558!3d40.69763123330436!2m3!1f0!2f0!3f0!3m2!1i1024!2i768!4f13.1!3m3!1m2!1s0x89c24fa5d33f083b%3A0xc80b8f06e177fe62!2sNew%20York%2C%20NY!5e0!3m2!1sen!2sus!4v1722483445443!5m2!1sen!2sus" width="600" height="450" style="border:0;" allowfullscreen="" loading="lazy" referrerpolicy="no-referrer-when-downgrade"></iframe>
'''


# Define the welcome message and specific topics the chatbot can provide information about
welcome_message = """
# 🌱 Welcome to GreenGuide!
## Your AI-driven assistant for all eco-friendly travel-related queries in NYC. Created by Eva, Amy, and Ambur of the 2024 Kode With Klossy NYC AI/ML Camp. 
## ... and updated by Ambur for the virtual 2026 KWK AI/ML IA Camp!
### wow, what a lot of acronyms!
"""

topics = """
### Feel free to ask me anything things to do in the city!
- Hotels (affordable, luxury)
- Restaurants (regular, vegetarian, vegan)
- Parks & Gardens
- Thrift Stores
- Attractions
"""

# Setup the Gradio Blocks interface with custom layout components
with gr.Blocks(theme=theme) as demo:
    gr.Image("header2.png") #CHANGE !!
    gr.Markdown(welcome_message)  # Display the formatted welcome message
    with gr.Row():
        with gr.Column():
            gr.Markdown(topics)  # Show the topics on the left side
    with gr.Row():
        with gr.Column():
            question = gr.Textbox(label="Your question", placeholder="What do you want to ask about?")
            answer = gr.Textbox(label="GreenGuide Response", placeholder="GreenGuide will respond here...", interactive=False, lines=10)
            submit_button = gr.Button("Submit")
            submit_button.click(fn=query_model, inputs=question, outputs=answer)

    gr.HTML(iframe)
    

# Launch the Gradio app to allow user interaction
demo.launch()