Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from huggingface_hub import InferenceClient | |
| import json | |
| import numpy as np | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| from bs4 import BeautifulSoup | |
| hf_token = os.getenv("HF_Token") | |
| client = InferenceClient("Qwen/Qwen2.5-7B-Instruct", token=hf_token) | |
| embed_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| def preprocess_text(text): | |
| cleaned_text = text.strip() | |
| chunks = [] | |
| sentences = cleaned_text.split("\n") | |
| for i in sentences: | |
| chunks.extend(i.split(". ")) | |
| cleaned_chunks = [] | |
| for chunk in chunks: | |
| chunk = chunk.strip() | |
| if len(chunk) > 0: | |
| cleaned_chunks.append(chunk) | |
| return cleaned_chunks | |
| def prepare_docs(): | |
| with open('spots.json', 'r') as f: | |
| raw_data = json.load(f) | |
| all_processed_chunks = [] | |
| for item in raw_data: | |
| soup = BeautifulSoup(item['popup'], 'html.parser') | |
| name = soup.find(class_='infobox-title').get_text() if soup.find(class_='infobox-title') else "Unknown Spot" | |
| raw_html_text = soup.get_text(separator=" ") | |
| chunks = preprocess_text(raw_html_text) | |
| for chunk in chunks: | |
| all_processed_chunks.append(f"[{name}]: {chunk}") | |
| return all_processed_chunks | |
| processed_data = prepare_docs() | |
| embeddings = embed_model.encode(processed_data) | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(np.array(embeddings).astype('float32')) | |
| def retrieve(query, k=3): | |
| query_vec = embed_model.encode([query]) | |
| distances, indices = index.search(np.array(query_vec).astype('float32'), k) | |
| return [processed_data[i] for i in indices[0]] | |
| def respond(message, history): | |
| retrieved_info = retrieve(message) | |
| context = "\n- ".join(retrieved_info) | |
| system_prompt = f"""You are 'CityScout', a friendly guide to unique hangout spots. | |
| Use the following verified facts from our database to help the user. | |
| Always mention the name of the spot found in the brackets [Like This]. | |
| Database Facts: | |
| - {context} | |
| If you find a match, describe it enthusiastically! If not, help them brainstorm based on their interests.""" | |
| messages = [{"role": "system", "content": system_prompt}] | |
| for msg in history: | |
| messages.append(msg) | |
| messages.append({"role": "user", "content": message}) | |
| response = "" | |
| for chunk in client.chat_completion( | |
| messages, | |
| max_tokens=500, | |
| temperature=0.7, | |
| top_p=0.9, | |
| stream=True | |
| ): | |
| token = chunk.choices[0].delta.content | |
| if token: | |
| response += token | |
| yield response | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| title="CityScout: Unique Spot Finder", | |
| description="Tell me your city or interests and I'll help you find cool places nearby!" | |
| ) | |
| if __name__ == "__main__": | |
| chatbot.launch() |