import gradio as gr import os from huggingface_hub import InferenceClient import json import numpy as np import faiss from sentence_transformers import SentenceTransformer from bs4 import BeautifulSoup hf_token = os.getenv("HF_Token") client = InferenceClient("Qwen/Qwen2.5-7B-Instruct", token=hf_token) embed_model = SentenceTransformer("all-MiniLM-L6-v2") def preprocess_text(text): cleaned_text = text.strip() chunks = [] sentences = cleaned_text.split("\n") for i in sentences: chunks.extend(i.split(". ")) cleaned_chunks = [] for chunk in chunks: chunk = chunk.strip() if len(chunk) > 0: cleaned_chunks.append(chunk) return cleaned_chunks def prepare_docs(): with open('spots.json', 'r') as f: raw_data = json.load(f) all_processed_chunks = [] for item in raw_data: soup = BeautifulSoup(item['popup'], 'html.parser') name = soup.find(class_='infobox-title').get_text() if soup.find(class_='infobox-title') else "Unknown Spot" raw_html_text = soup.get_text(separator=" ") chunks = preprocess_text(raw_html_text) for chunk in chunks: all_processed_chunks.append(f"[{name}]: {chunk}") return all_processed_chunks processed_data = prepare_docs() embeddings = embed_model.encode(processed_data) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(np.array(embeddings).astype('float32')) def retrieve(query, k=3): query_vec = embed_model.encode([query]) distances, indices = index.search(np.array(query_vec).astype('float32'), k) return [processed_data[i] for i in indices[0]] def respond(message, history): retrieved_info = retrieve(message) context = "\n- ".join(retrieved_info) system_prompt = f"""You are 'CityScout', a friendly guide to unique hangout spots. Use the following verified facts from our database to help the user. Always mention the name of the spot found in the brackets [Like This]. Database Facts: - {context} If you find a match, describe it enthusiastically! If not, help them brainstorm based on their interests.""" messages = [{"role": "system", "content": system_prompt}] for msg in history: messages.append(msg) messages.append({"role": "user", "content": message}) response = "" for chunk in client.chat_completion( messages, max_tokens=500, temperature=0.7, top_p=0.9, stream=True ): token = chunk.choices[0].delta.content if token: response += token yield response chatbot = gr.ChatInterface( respond, title="CityScout: Unique Spot Finder", description="Tell me your city or interests and I'll help you find cool places nearby!" ) if __name__ == "__main__": chatbot.launch()