Spaces:
Sleeping
Sleeping
still having errors? fixed indentation
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import json
|
|
| 5 |
import numpy as np
|
| 6 |
import faiss
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
-
from bs4 import BeautifulSoup
|
| 9 |
|
| 10 |
hf_token = os.getenv("HF_Token")
|
| 11 |
client = InferenceClient("Qwen/Qwen2.5-7B-Instruct", token=hf_token)
|
|
@@ -19,12 +19,10 @@ def preprocess_text(text):
|
|
| 19 |
chunks.extend(i.split(". "))
|
| 20 |
|
| 21 |
cleaned_chunks = []
|
| 22 |
-
|
| 23 |
for chunk in chunks:
|
| 24 |
chunk = chunk.strip()
|
| 25 |
if len(chunk) > 0:
|
| 26 |
cleaned_chunks.append(chunk)
|
| 27 |
-
|
| 28 |
return cleaned_chunks
|
| 29 |
|
| 30 |
def prepare_docs():
|
|
@@ -35,17 +33,16 @@ def prepare_docs():
|
|
| 35 |
|
| 36 |
for item in raw_data:
|
| 37 |
soup = BeautifulSoup(item['popup'], 'html.parser')
|
|
|
|
| 38 |
raw_html_text = soup.get_text(separator=" ")
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
all_processed_chunks.append(chunk)
|
| 44 |
|
| 45 |
return all_processed_chunks
|
| 46 |
|
| 47 |
processed_data = prepare_docs()
|
| 48 |
-
|
| 49 |
embeddings = embed_model.encode(processed_data)
|
| 50 |
index = faiss.IndexFlatL2(embeddings.shape[1])
|
| 51 |
index.add(np.array(embeddings).astype('float32'))
|
|
@@ -70,8 +67,9 @@ def respond(message, history):
|
|
| 70 |
|
| 71 |
messages = [{"role": "system", "content": system_prompt}]
|
| 72 |
|
| 73 |
-
for
|
| 74 |
-
messages.append(
|
|
|
|
| 75 |
|
| 76 |
messages.append({"role": "user", "content": message})
|
| 77 |
|
|
@@ -90,9 +88,9 @@ def respond(message, history):
|
|
| 90 |
|
| 91 |
chatbot = gr.ChatInterface(
|
| 92 |
respond,
|
| 93 |
-
type="messages",
|
| 94 |
title="CityScout: Unique Spot Finder",
|
| 95 |
-
description="Tell me your city
|
| 96 |
)
|
| 97 |
|
| 98 |
-
|
|
|
|
|
|
| 5 |
import numpy as np
|
| 6 |
import faiss
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
+
from bs4 import BeautifulSoup
|
| 9 |
|
| 10 |
hf_token = os.getenv("HF_Token")
|
| 11 |
client = InferenceClient("Qwen/Qwen2.5-7B-Instruct", token=hf_token)
|
|
|
|
| 19 |
chunks.extend(i.split(". "))
|
| 20 |
|
| 21 |
cleaned_chunks = []
|
|
|
|
| 22 |
for chunk in chunks:
|
| 23 |
chunk = chunk.strip()
|
| 24 |
if len(chunk) > 0:
|
| 25 |
cleaned_chunks.append(chunk)
|
|
|
|
| 26 |
return cleaned_chunks
|
| 27 |
|
| 28 |
def prepare_docs():
|
|
|
|
| 33 |
|
| 34 |
for item in raw_data:
|
| 35 |
soup = BeautifulSoup(item['popup'], 'html.parser')
|
| 36 |
+
name = soup.find(class_='infobox-title').get_text() if soup.find(class_='infobox-title') else "Unknown Spot"
|
| 37 |
raw_html_text = soup.get_text(separator=" ")
|
| 38 |
+
|
| 39 |
+
chunks = preprocess_text(raw_html_text)
|
| 40 |
+
for chunk in chunks:
|
| 41 |
+
all_processed_chunks.append(f"[{name}]: {chunk}")
|
|
|
|
| 42 |
|
| 43 |
return all_processed_chunks
|
| 44 |
|
| 45 |
processed_data = prepare_docs()
|
|
|
|
| 46 |
embeddings = embed_model.encode(processed_data)
|
| 47 |
index = faiss.IndexFlatL2(embeddings.shape[1])
|
| 48 |
index.add(np.array(embeddings).astype('float32'))
|
|
|
|
| 67 |
|
| 68 |
messages = [{"role": "system", "content": system_prompt}]
|
| 69 |
|
| 70 |
+
for user_msg, assistant_msg in history:
|
| 71 |
+
if user_msg: messages.append({"role": "user", "content": user_msg})
|
| 72 |
+
if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg})
|
| 73 |
|
| 74 |
messages.append({"role": "user", "content": message})
|
| 75 |
|
|
|
|
| 88 |
|
| 89 |
chatbot = gr.ChatInterface(
|
| 90 |
respond,
|
|
|
|
| 91 |
title="CityScout: Unique Spot Finder",
|
| 92 |
+
description="Tell me your city or interests (e.g. 'mummies' or 'New York')!"
|
| 93 |
)
|
| 94 |
|
| 95 |
+
if __name__ == "__main__":
|
| 96 |
+
chatbot.launch()
|