igortech commited on
Commit
a18d57c
·
verified ·
1 Parent(s): 8b86f79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -83
app.py CHANGED
@@ -1,104 +1,217 @@
1
  import json
 
 
 
2
  import os
3
- import difflib
 
4
  import gradio as gr
 
5
 
6
- # Path to your JSON data
 
 
7
  DATA_PATH = "quotes.json"
8
 
9
- # Load quotes
10
  def load_quotes():
11
  if os.path.exists(DATA_PATH):
12
- with open(DATA_PATH, "r", encoding="utf-8") as f:
13
- return json.load(f)
 
 
 
 
 
 
 
14
  return {}
15
 
16
  QUOTES = load_quotes()
17
 
18
- # Fuzzy match logic (improved with tokenization)
19
- def fuzzy_match(query, choices):
20
- tokens = query.lower().split()
21
- best_score = 0
22
- best_match = None
23
- for choice in choices:
24
- score = difflib.SequenceMatcher(None, query.lower(), choice.lower()).ratio()
25
- token_overlap = len(set(tokens) & set(choice.lower().split())) / max(len(tokens), 1)
26
- combined_score = (score + token_overlap) / 2
27
- if combined_score > best_score:
28
- best_score = combined_score
29
- best_match = choice
30
- return best_match, best_score
31
-
32
- # Handle user query
33
- def handle_query(user_message, category, sentiment, url_restrict):
34
- responses = []
35
-
36
- # Validate category
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  if category not in QUOTES:
38
- return [{"role": "assistant", "content": "Category not found in dataset."}]
39
-
40
- # Collect relevant quotes
41
- choices = []
42
- if sentiment == "auto":
43
- for sent in QUOTES[category]:
44
- choices.extend(QUOTES[category][sent])
45
- else:
46
- choices = QUOTES[category].get(sentiment, [])
47
-
48
- if not choices:
49
- return [{"role": "assistant", "content": "No quotes available for this category/sentiment."}]
50
-
51
- # Fuzzy match query against quotes
52
- best_match, score = fuzzy_match(user_message, choices)
53
-
54
- # === 3-tier response ===
55
- # Tier 1: Summary (simple echo of category/topic)
56
- summary = f"**Topic Summary:** This question seems related to *{category.replace('_', ' ')}*."
57
-
58
- # Tier 2: "What real people say"
59
- if best_match and score > 0.3:
60
- details = f"**What real people say:**\n{best_match}"
61
- else:
62
- details = "**What real people say:**\nSorry, no close match found."
63
-
64
- # Tier 3: External article reference (stub)
65
- if url_restrict.strip():
66
- external = f"**Similar articles (restricted to {url_restrict}):**\n[Search results on {url_restrict}](https://www.google.com/search?q={user_message}+site:{url_restrict})"
 
 
 
 
 
 
 
 
 
67
  else:
68
- external = "**Similar articles:**\n[Search on Google](https://www.google.com/search?q=" + user_message.replace(" ", "+") + ")"
69
-
70
- responses.extend([
71
- {"role": "assistant", "content": summary},
72
- {"role": "assistant", "content": details},
73
- {"role": "assistant", "content": external}
74
- ])
75
-
76
- return responses
77
-
78
- # Build Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  with gr.Blocks() as demo:
80
- gr.Markdown("## 🎓 College Life Q&A Chatbot")
81
-
82
- with gr.Row():
83
- category = gr.Dropdown(choices=list(QUOTES.keys()), label="Category", interactive=True)
84
- sentiment = gr.Dropdown(choices=["auto", "positive", "negative"], value="auto", label="Sentiment", interactive=True)
85
-
86
- url_restrict = gr.Textbox(label="Restrict search to domain (optional)", placeholder="e.g., nytimes.com")
87
 
88
- chatbot = gr.Chatbot(label="Conversation", type="messages", height=400)
89
- msg = gr.Textbox(label="Ask a question")
90
 
91
  with gr.Row():
92
- clear_btn = gr.Button("Clear Chat")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- def respond(message, chat_history, category, sentiment, url_restrict):
95
- bot_msgs = handle_query(message, category, sentiment, url_restrict)
96
- chat_history.append({"role": "user", "content": message})
97
- chat_history.extend(bot_msgs)
98
- return "", chat_history
99
-
100
- msg.submit(respond, [msg, chatbot, category, sentiment, url_restrict], [msg, chatbot])
101
- clear_btn.click(lambda: [], None, chatbot)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  if __name__ == "__main__":
104
- demo.launch()
 
1
  import json
2
+ import random
3
+ import re
4
+ import string
5
  import os
6
+ import datetime
7
+ import csv
8
  import gradio as gr
9
+ from difflib import SequenceMatcher
10
 
11
+ # -----------------------------
12
+ # Config / data loading
13
+ # -----------------------------
14
  DATA_PATH = "quotes.json"
15
 
 
16
  def load_quotes():
17
  if os.path.exists(DATA_PATH):
18
+ try:
19
+ with open(DATA_PATH, "r", encoding="utf-8") as f:
20
+ data = json.load(f)
21
+ if isinstance(data, dict):
22
+ print(f"Loaded dataset from {DATA_PATH} with {len(data)} categories.")
23
+ return data
24
+ except Exception as e:
25
+ print(f"Failed to load {DATA_PATH}: {e}")
26
+ print("No dataset file found. Upload one via the UI.")
27
  return {}
28
 
29
  QUOTES = load_quotes()
30
 
31
+ # -----------------------------
32
+ # Text helpers
33
+ # -----------------------------
34
+ STOPWORDS = {
35
+ "the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for",
36
+ "is","are","was","were","be","being","been","it","that","this","these","those","with",
37
+ "as","by","from","about","into","over","after","before","up","down","out"
38
+ }
39
+
40
+ POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
41
+ NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}
42
+
43
+ punct_re = re.compile(f"[{re.escape(string.punctuation)}]")
44
+
45
+ def normalize(text: str) -> str:
46
+ return punct_re.sub(" ", (text or "").lower())
47
+
48
+ def tokenize(text: str):
49
+ return [t for t in normalize(text).split() if t and t not in STOPWORDS]
50
+
51
+ def infer_sentiment(user_text: str) -> str:
52
+ tl = normalize(user_text)
53
+ has_pos = any(w in tl for w in POS_HINTS)
54
+ has_neg = any(w in tl for w in NEG_HINTS)
55
+ if has_pos and not has_neg:
56
+ return "positive"
57
+ if has_neg and not has_pos:
58
+ return "negative"
59
+ return "positive" # default
60
+
61
+ # -----------------------------
62
+ # Retrieval (with fuzzy match)
63
+ # -----------------------------
64
+ def similarity(a, b):
65
+ return SequenceMatcher(None, a, b).ratio()
66
+
67
+ def best_match_quote(category: str, sentiment: str, user_text: str) -> str:
68
  if category not in QUOTES:
69
+ return f"No quotes found for category '{category}'."
70
+ if sentiment not in QUOTES[category]:
71
+ return f"No quotes found for sentiment '{sentiment}' in category '{category}'."
72
+
73
+ pool = QUOTES[category][sentiment]
74
+ if not pool:
75
+ return f"No quotes available in '{category}' → '{sentiment}'."
76
+
77
+ best_score = -1
78
+ best_quote = None
79
+ for quote in pool:
80
+ score = similarity(user_text.lower(), quote.lower())
81
+ if score > best_score:
82
+ best_score = score
83
+ best_quote = quote
84
+
85
+ if best_score < 0.3: # fuzzy threshold
86
+ return "I don’t have data on that specific question."
87
+ return best_quote
88
+
89
+ # -----------------------------
90
+ # Gradio callbacks
91
+ # -----------------------------
92
+ conversation_log = [] # keep all turns for export
93
+
94
+ def respond(message, history, category, sentiment_choice):
95
+ if not QUOTES:
96
+ bot = "No dataset loaded. Please upload a JSON file first."
97
+ history.append((message, bot))
98
+ return "", history
99
+
100
+ if not category:
101
+ bot = "Please select a category."
102
+ history.append((message, bot))
103
+ return "", history
104
+
105
+ if sentiment_choice == "auto":
106
+ sent = infer_sentiment(message)
107
  else:
108
+ sent = sentiment_choice
109
+
110
+ bot = best_match_quote(category, sent, message)
111
+ history.append((message, bot))
112
+
113
+ # log turn for export
114
+ conversation_log.append({
115
+ "timestamp": datetime.datetime.now().isoformat(),
116
+ "category": category,
117
+ "sentiment": sent,
118
+ "user_message": message,
119
+ "bot_response": bot,
120
+ })
121
+
122
+ return "", history
123
+
124
+ def clear_chat():
125
+ return None
126
+
127
+ def upload_json(filepath):
128
+ global QUOTES
129
+ try:
130
+ with open(filepath, "r", encoding="utf-8") as f:
131
+ data = json.load(f)
132
+ if not isinstance(data, dict):
133
+ return gr.update(value="Upload failed: JSON root must be an object."), gr.update(choices=[])
134
+ QUOTES = data
135
+ cats = sorted(list(QUOTES.keys()))
136
+ status = f"Loaded {len(cats)} categories."
137
+ return status, gr.update(choices=cats, value=(cats[0] if cats else None))
138
+ except Exception as e:
139
+ return f"Error loading file: {e}", gr.update(choices=[])
140
+
141
+ def download_current():
142
+ out_name = DATA_PATH or "quotes_export.json"
143
+ try:
144
+ if DATA_PATH and os.path.exists(DATA_PATH):
145
+ return DATA_PATH
146
+ tmp = "quotes_export.json"
147
+ with open(tmp, "w", encoding="utf-8") as f:
148
+ json.dump(QUOTES, f, indent=2, ensure_ascii=False)
149
+ return tmp
150
+ except Exception:
151
+ return None
152
+
153
+ def export_conversation():
154
+ filename = "conversation_log.csv"
155
+ try:
156
+ with open(filename, "w", newline="", encoding="utf-8") as csvfile:
157
+ fieldnames = ["timestamp", "category", "sentiment", "user_message", "bot_response"]
158
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
159
+ writer.writeheader()
160
+ writer.writerows(conversation_log)
161
+ return filename
162
+ except Exception as e:
163
+ print(f"Error exporting CSV: {e}")
164
+ return None
165
+
166
+ # -----------------------------
167
+ # UI
168
+ # -----------------------------
169
  with gr.Blocks() as demo:
170
+ gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, Similarity Matching")
 
 
 
 
 
 
171
 
172
+ initial_categories = sorted(list(QUOTES.keys()))
 
173
 
174
  with gr.Row():
175
+ category = gr.Dropdown(
176
+ label="Category",
177
+ choices=initial_categories,
178
+ value=(initial_categories[0] if initial_categories else None)
179
+ )
180
+ sentiment = gr.Dropdown(
181
+ label="Sentiment",
182
+ choices=["auto", "positive", "negative"],
183
+ value="auto"
184
+ )
185
+
186
+ chatbot = gr.Chatbot(label="Conversation", height=360, type="tuples")
187
+ msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True)
188
+ send = gr.Button("Send")
189
+ clear = gr.Button("Clear")
190
 
191
+ with gr.Row():
192
+ uploader = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
193
+ upload_status = gr.Textbox(label="Upload status", interactive=False)
194
+ downloader = gr.File(label="Download current dataset")
195
+ csv_exporter = gr.File(label="Export conversation (.csv)")
196
+
197
+ # Wire events
198
+ msg.submit(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
199
+ send.click(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
200
+ clear.click(clear_chat, None, chatbot, queue=False)
201
+
202
+ uploader.upload(upload_json, uploader, [upload_status, category])
203
+ downloader.download(download_current)
204
+ csv_exporter.download(export_conversation)
205
+
206
+ # -----------------------------
207
+ # Startup log
208
+ # -----------------------------
209
+ print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
210
+ if QUOTES:
211
+ for cat, sents in QUOTES.items():
212
+ p = len(sents.get("positive", []))
213
+ n = len(sents.get("negative", []))
214
+ print(f" - {cat}: {p} positive, {n} negative")
215
 
216
  if __name__ == "__main__":
217
+ demo.launch(server_name="0.0.0.0", server_port=7860)