Spaces:

chitech2026
/

sandbox_dev

Sleeping

App Files Files Community

sandbox_dev / app.py

igortech

Update app.py

b4686d0 verified 6 months ago

raw

history blame contribute delete

7.04 kB

	import json
	import os
	import re
	import csv
	import tempfile
	from rapidfuzz import fuzz
	import datetime
	import gradio as gr

	# -----------------------------
	# Config / data loading
	# -----------------------------
	DATA_PATH = "quotes.json"

	def load_quotes():
	if os.path.exists(DATA_PATH):
	try:
	with open(DATA_PATH, "r", encoding="utf-8") as f:
	data = json.load(f)
	if isinstance(data, dict):
	print(f"Loaded dataset from {DATA_PATH} with {len(data)} categories.")
	return data
	except Exception as e:
	print(f"Failed to load {DATA_PATH}: {e}")
	print("No dataset file found. Upload one via the UI.")
	return {}

	QUOTES = load_quotes()

	# -----------------------------
	# Text helpers
	# -----------------------------
	STOPWORDS = {
	"the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for",
	"is","are","was","were","be","being","been","it","that","this","these","those","with",
	"as","by","from","about","into","over","after","before","up","down","out"
	}

	POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
	NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}

	punct_re = re.compile(r"[{}]".format(re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{\|}~""")))

	def normalize(text: str) -> str:
	return punct_re.sub(" ", (text or "").lower())

	def tokenize(text: str):
	return [t for t in normalize(text).split() if t and t not in STOPWORDS]

	def infer_sentiment(user_text: str) -> str:
	tl = normalize(user_text)
	has_pos = any(w in tl for w in POS_HINTS)
	has_neg = any(w in tl for w in NEG_HINTS)
	if has_pos and not has_neg:
	return "positive"
	if has_neg and not has_pos:
	return "negative"
	return "positive"

	# -----------------------------
	# Retrieval
	# -----------------------------
	def best_match_quote(user_text: str) -> str:
	best_score = 0
	best_quote = None
	for cat, quotes_list in QUOTES.items():
	for quote_entry in quotes_list:
	quote = quote_entry.get("quote", "")
	if not quote.strip():
	continue
	score = fuzz.partial_ratio(user_text.lower(), quote.lower())
	if score > best_score:
	best_score = score
	best_quote = quote
	if best_score < 30 or best_quote is None:
	return f"No data about '{user_text}'"
	return best_quote

	# -----------------------------
	# Gradio callbacks
	# -----------------------------
	def respond(message, history, category):
	if not QUOTES:
	bot = "No dataset loaded. Please upload a JSON file first."
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": bot})
	return "", history

	if not category:
	bot = "Please select a category."
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": bot})
	return "", history

	quote = best_match_quote(message)

	# 3-fold response
	summary = quote.split(". ")[0] + "." if "." in quote else quote
	detail = quote
	unknown = ""
	if "No data about" in quote:
	unknown = quote

	bot_text = f"Summary:\n{summary}\n\nWhat real people say:\n{detail}"
	if unknown:
	bot_text += f"\n\n{unknown}"

	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": bot_text})
	return "", history

	def clear_chat():
	return None

	def upload_json(filepath):
	global QUOTES, DATA_PATH
	try:
	with open(filepath, "r", encoding="utf-8") as f:
	data = json.load(f)
	if not isinstance(data, dict):
	return gr.update(value="Upload failed: JSON root must be an object."), gr.update(choices=[])
	QUOTES = data
	DATA_PATH = os.path.basename(filepath)
	cats = sorted(list(QUOTES.keys()))
	status = f"Loaded {len(cats)} categories from {DATA_PATH}."
	return status, gr.update(choices=cats, value=(cats[0] if cats else None))
	except Exception as e:
	return f"Error loading file: {e}", gr.update(choices=[])

	def download_current_json():
	tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
	with open(tmp_file.name, "w", encoding="utf-8") as f:
	json.dump(QUOTES, f, indent=2, ensure_ascii=False)
	return tmp_file.name

	def download_conversation_csv(history):
	tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
	with open(tmp_file.name, "w", newline="", encoding="utf-8") as f:
	writer = csv.writer(f)
	writer.writerow(["role", "message"])
	for msg in history:
	writer.writerow([msg.get("role"), msg.get("content")])
	return tmp_file.name

	# -----------------------------
	# UI
	# -----------------------------
	with gr.Blocks() as demo:
	gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, Fuzzy Matching")

	initial_categories = sorted(list(QUOTES.keys()))

	with gr.Row():
	category = gr.Dropdown(
	label="Category",
	choices=initial_categories,
	value=(initial_categories[0] if initial_categories else None)
	)

	chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
	msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True)
	send = gr.Button("Send")
	clear = gr.Button("Clear")

	with gr.Row():
	upload_btn = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
	upload_status = gr.Textbox(label="Upload status", interactive=False)

	# New download system
	with gr.Row():
	download_json_btn = gr.Button("Download current dataset (.json)")
	download_csv_btn = gr.Button("Export conversation to CSV")
	download_json_file = gr.File(label="JSON download")
	download_csv_file = gr.File(label="CSV download")

	# Events
	msg.submit(respond, [msg, chatbot, category], [msg, chatbot])
	send.click(respond, [msg, chatbot, category], [msg, chatbot])
	clear.click(clear_chat, None, chatbot, queue=False)
	upload_btn.upload(upload_json, upload_btn, [upload_status, category])

	# Fixed download events using Button -> File
	download_json_btn.click(fn=download_current_json, inputs=None, outputs=download_json_file)
	download_csv_btn.click(fn=download_conversation_csv, inputs=chatbot, outputs=download_csv_file)

	# -----------------------------
	# Startup log
	# -----------------------------
	print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
	if QUOTES:
	for cat, entries in QUOTES.items():
	print(f" - {cat}: {len(entries)} entries")

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)