CapstoneProject / app.py
polinapred's picture
debugging respond function
5d7d7e8 verified
import gradio as gr
import os
from huggingface_hub import InferenceClient
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from bs4 import BeautifulSoup
hf_token = os.getenv("HF_Token")
client = InferenceClient("Qwen/Qwen2.5-7B-Instruct", token=hf_token)
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
def preprocess_text(text):
cleaned_text = text.strip()
chunks = []
sentences = cleaned_text.split("\n")
for i in sentences:
chunks.extend(i.split(". "))
cleaned_chunks = []
for chunk in chunks:
chunk = chunk.strip()
if len(chunk) > 0:
cleaned_chunks.append(chunk)
return cleaned_chunks
def prepare_docs():
with open('spots.json', 'r') as f:
raw_data = json.load(f)
all_processed_chunks = []
for item in raw_data:
soup = BeautifulSoup(item['popup'], 'html.parser')
name = soup.find(class_='infobox-title').get_text() if soup.find(class_='infobox-title') else "Unknown Spot"
raw_html_text = soup.get_text(separator=" ")
chunks = preprocess_text(raw_html_text)
for chunk in chunks:
all_processed_chunks.append(f"[{name}]: {chunk}")
return all_processed_chunks
processed_data = prepare_docs()
embeddings = embed_model.encode(processed_data)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings).astype('float32'))
def retrieve(query, k=3):
query_vec = embed_model.encode([query])
distances, indices = index.search(np.array(query_vec).astype('float32'), k)
return [processed_data[i] for i in indices[0]]
def respond(message, history):
retrieved_info = retrieve(message)
context = "\n- ".join(retrieved_info)
system_prompt = f"""You are 'CityScout', a friendly guide to unique hangout spots.
Use the following verified facts from our database to help the user.
Always mention the name of the spot found in the brackets [Like This].
Database Facts:
- {context}
If you find a match, describe it enthusiastically! If not, help them brainstorm based on their interests."""
messages = [{"role": "system", "content": system_prompt}]
for msg in history:
messages.append(msg)
messages.append({"role": "user", "content": message})
response = ""
for chunk in client.chat_completion(
messages,
max_tokens=500,
temperature=0.7,
top_p=0.9,
stream=True
):
token = chunk.choices[0].delta.content
if token:
response += token
yield response
chatbot = gr.ChatInterface(
respond,
title="CityScout: Unique Spot Finder",
description="Tell me your city or interests and I'll help you find cool places nearby!"
)
if __name__ == "__main__":
chatbot.launch()