igortech commited on
Commit
f008056
·
verified ·
1 Parent(s): db4315a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -170
app.py CHANGED
@@ -5,6 +5,7 @@ import string
5
  import os
6
  import datetime
7
  import difflib
 
8
  import gradio as gr
9
 
10
  # -----------------------------
@@ -12,204 +13,169 @@ import gradio as gr
12
  # -----------------------------
13
  DATA_PATH = "quotes.json"
14
 
15
- def load_quotes():
16
  if os.path.exists(DATA_PATH):
17
- try:
18
- with open(DATA_PATH, "r", encoding="utf-8") as f:
19
- data = json.load(f)
20
- # Ensure staged_responses bucket always exists
21
- if "staged_responses" not in data:
22
- data["staged_responses"] = []
23
- print(f"Loaded dataset from {DATA_PATH} with {len(data)} categories.")
24
- return data, DATA_PATH
25
- except Exception as e:
26
- print(f"Failed to load {DATA_PATH}: {e}")
27
- # fallback: empty
28
- return {"staged_responses": []}, None
29
-
30
- QUOTES, DATA_PATH = load_quotes()
31
 
32
  # -----------------------------
33
- # Text helpers
 
 
 
34
  # -----------------------------
35
- STOPWORDS = {
36
- "the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for",
37
- "is","are","was","were","be","being","been","it","that","this","these","those","with",
38
- "as","by","from","about","into","over","after","before","up","down","out"
39
- }
 
 
 
 
40
 
41
- POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
42
- NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}
 
 
 
 
 
 
 
43
 
44
- punct_re = re.compile(f"[{re.escape(string.punctuation)}]")
45
 
46
- def normalize(text: str) -> str:
47
- return punct_re.sub(" ", (text or "").lower())
 
 
 
 
48
 
49
- def tokenize(text: str):
50
- return [t for t in normalize(text).split() if t and t not in STOPWORDS]
 
51
 
52
- def infer_sentiment(user_text: str) -> str:
53
- tl = normalize(user_text)
54
- has_pos = any(w in tl for w in POS_HINTS)
55
- has_neg = any(w in tl for w in NEG_HINTS)
56
- if has_pos and not has_neg:
57
- return "positive"
58
- if has_neg and not has_pos:
59
- return "negative"
60
- return "positive" # default
61
 
62
- # -----------------------------
63
- # Retrieval with fuzzy matching
64
- # -----------------------------
65
- def best_match_quote(category: str, sentiment: str, user_text: str) -> str:
66
- if category not in QUOTES:
67
- return f"No quotes found for category '{category}'."
68
- pool = QUOTES[category]
69
- if not pool:
70
- return f"No quotes available in '{category}'."
71
-
72
- q_tokens = set(tokenize(user_text))
73
- best_score = -1
74
- best_quote = None
75
-
76
- for entry in pool:
77
- qtoks = set(tokenize(entry.get("quote", "")))
78
- score = len(q_tokens & qtoks)
79
- # fuzzy matching fallback
80
- if score == 0:
81
- for word in q_tokens:
82
- matches = difflib.get_close_matches(word, qtoks, n=1, cutoff=0.8)
83
- if matches:
84
- score += 1
85
- if score > best_score:
86
- best_score = score
87
- best_quote = entry.get("quote", "")
88
 
89
- if not best_quote:
90
- return random.choice([e.get("quote","") for e in pool if "quote" in e])
91
- return best_quote
92
 
93
  # -----------------------------
94
- # Gradio callbacks
95
  # -----------------------------
96
- def respond(message, history, category, sentiment_choice):
97
- if not QUOTES:
98
- bot = "No dataset loaded. Please upload a JSON file first."
99
- history.append((message, bot))
100
- return "", history
101
-
102
- if not category:
103
- bot = "Please select a category."
104
- history.append((message, bot))
105
- return "", history
106
-
107
- # sentiment not really used with interview-style data, but kept for compatibility
108
- if sentiment_choice == "auto":
109
- sent = infer_sentiment(message)
110
- else:
111
- sent = sentiment_choice
112
-
113
- bot = best_match_quote(category, sent, message)
114
- history.append((message, bot))
115
- return "", history
116
-
117
- def clear_chat():
118
- return None
119
-
120
- def upload_json(filepath):
121
- global QUOTES, DATA_PATH
122
- try:
123
- with open(filepath, "r", encoding="utf-8") as f:
124
- data = json.load(f)
125
- if not isinstance(data, dict):
126
- return gr.update(value="Upload failed: JSON root must be an object."), gr.update(choices=[])
127
-
128
- if "staged_responses" not in data:
129
- data["staged_responses"] = []
130
-
131
- QUOTES = data
132
- DATA_PATH = os.path.basename(filepath)
133
- cats = sorted(list(QUOTES.keys()))
134
- status = f"Loaded {len(cats)} categories from {DATA_PATH}."
135
- return status, gr.update(choices=cats, value=(cats[0] if cats else None))
136
- except Exception as e:
137
- return f"Error loading file: {e}", gr.update(choices=[])
138
-
139
- def download_current():
140
- """Download dataset including staged responses."""
141
- out_name = DATA_PATH or "quotes_export.json"
142
- try:
143
- tmp = "quotes_export.json"
144
- with open(tmp, "w", encoding="utf-8") as f:
145
- json.dump(QUOTES, f, indent=2, ensure_ascii=False)
146
- return tmp
147
- except Exception:
148
  return None
149
 
150
- def stage_conversation(history, category):
151
- if not category:
152
- return "Please select a category to stage into."
 
 
 
 
153
 
154
- staged = QUOTES.get("staged_responses", [])
155
- for msg, bot in history:
156
- staged.append({
157
- "category": category,
158
- "user": msg,
159
- "bot": bot
160
- })
161
- QUOTES["staged_responses"] = staged
162
- return f"Staged {len(history)} exchanges into 'staged_responses'."
 
 
 
 
 
 
 
163
 
164
  # -----------------------------
165
  # UI
166
  # -----------------------------
167
- with gr.Blocks() as demo:
168
- gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, Similarity Matching")
 
 
169
 
170
- initial_categories = sorted(list(QUOTES.keys()))
 
171
 
172
  with gr.Row():
173
- category = gr.Dropdown(
174
- label="Category",
175
- choices=initial_categories,
176
- value=(initial_categories[0] if initial_categories else None)
177
- )
178
- sentiment = gr.Dropdown(
179
- label="Sentiment",
180
- choices=["auto", "positive", "negative"],
181
- value="auto"
182
  )
 
183
 
184
- chatbot = gr.Chatbot(label="Conversation", height=360, type="tuples")
185
- msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True)
186
- send = gr.Button("Send")
187
- clear = gr.Button("Clear")
188
 
189
  with gr.Row():
190
- stage_btn = gr.Button("Stage Conversation to Category")
191
- stage_status = gr.Textbox(label="Stage status", interactive=False)
 
 
 
192
 
193
  with gr.Row():
194
- uploader = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
195
- upload_status = gr.Textbox(label="Upload status", interactive=False)
196
- downloader = gr.File(label="Download current dataset")
197
-
198
- # Wire events
199
- msg.submit(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
200
- send.click(respond, [msg, chatbot, category, sentiment], [msg, chatbot])
201
- clear.click(clear_chat, None, chatbot, queue=False)
202
 
203
- stage_btn.click(stage_conversation, [chatbot, category], stage_status)
204
-
205
- uploader.upload(upload_json, uploader, [upload_status, category])
206
- downloader.download(download_current)
207
-
208
- # -----------------------------
209
- # Startup log
210
- # -----------------------------
211
- print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
212
- if QUOTES:
213
- for cat, items in QUOTES.items():
214
- if isinstance(items, list):
215
- print(f" - {cat}: {len(items)} entries")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import os
6
  import datetime
7
  import difflib
8
+ import csv
9
  import gradio as gr
10
 
11
  # -----------------------------
 
13
  # -----------------------------
14
  DATA_PATH = "quotes.json"
15
 
16
+ def load_dataset():
17
  if os.path.exists(DATA_PATH):
18
+ with open(DATA_PATH, "r") as f:
19
+ return json.load(f)
20
+ return {}
21
+
22
+ def save_dataset(data):
23
+ with open(DATA_PATH, "w") as f:
24
+ json.dump(data, f, indent=2)
25
+
26
+ dataset = load_dataset()
 
 
 
 
 
27
 
28
  # -----------------------------
29
+ # Conversation state
30
+ # -----------------------------
31
+ conversation_history = []
32
+
33
  # -----------------------------
34
+ # Response logic
35
+ # -----------------------------
36
+ def find_best_quote(category, user_input):
37
+ """Fuzzy search for best matching quote in the selected category."""
38
+ if category not in dataset:
39
+ return None
40
+ quotes = dataset[category]
41
+ if not quotes:
42
+ return None
43
 
44
+ # Use difflib to score similarity
45
+ best_match = None
46
+ best_score = 0.0
47
+ for entry in quotes:
48
+ quote_text = entry["quote"]
49
+ score = difflib.SequenceMatcher(None, user_input.lower(), quote_text.lower()).ratio()
50
+ if score > best_score:
51
+ best_score = score
52
+ best_match = quote_text
53
 
54
+ return best_match
55
 
56
+ def respond(user_message, category, url_domain):
57
+ """Generate a structured 3-part response."""
58
+ summary = f"It seems you're asking about {category.lower()}."
59
+ fusion = find_best_quote(category, user_message)
60
+ if not fusion:
61
+ fusion = "No matching experiences were found in this category."
62
 
63
+ url_part = "No domain specified."
64
+ if url_domain and url_domain.strip():
65
+ url_part = f"Try searching this site for more: {url_domain.strip()}"
66
 
67
+ response = (
68
+ f"**What people say:**\n{summary}\n\n"
69
+ f"**Combined insight:**\n{fusion}\n\n"
70
+ f"**Related link:**\n{url_part}"
71
+ )
 
 
 
 
72
 
73
+ # Append to history (for CSV export)
74
+ conversation_history.append(
75
+ {"role": "user", "content": user_message},
76
+ )
77
+ conversation_history.append(
78
+ {"role": "assistant", "content": response},
79
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ return conversation_history
 
 
82
 
83
  # -----------------------------
84
+ # Utility: export conversation
85
  # -----------------------------
86
+ def export_conversation_csv():
87
+ if not conversation_history:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  return None
89
 
90
+ filename = f"conversation_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
91
+ with open(filename, "w", newline="") as csvfile:
92
+ writer = csv.writer(csvfile)
93
+ writer.writerow(["Role", "Message"])
94
+ for msg in conversation_history:
95
+ writer.writerow([msg["role"], msg["content"]])
96
+ return filename
97
 
98
+ # -----------------------------
99
+ # Stage / save dataset
100
+ # -----------------------------
101
+ def stage_response_to_category(category, message):
102
+ if not message.strip():
103
+ return "Message is empty."
104
+ if category not in dataset:
105
+ dataset[category] = []
106
+ dataset[category].append({"quote": message.strip()})
107
+ return f"Staged response saved under '{category}'."
108
+
109
+ def download_current_dataset():
110
+ filename = f"dataset_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
111
+ with open(filename, "w") as f:
112
+ json.dump(dataset, f, indent=2)
113
+ return filename
114
 
115
  # -----------------------------
116
  # UI
117
  # -----------------------------
118
+ def clear_conversation():
119
+ global conversation_history
120
+ conversation_history = []
121
+ return []
122
 
123
+ with gr.Blocks() as demo:
124
+ gr.Markdown("## College Life Q&A Chatbot")
125
 
126
  with gr.Row():
127
+ category_dropdown = gr.Dropdown(
128
+ choices=list(dataset.keys()),
129
+ label="Select Category",
130
+ value=list(dataset.keys())[0] if dataset else None
 
 
 
 
 
131
  )
132
+ url_input = gr.Textbox(label="Limit search to domain (optional)")
133
 
134
+ chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
 
 
 
135
 
136
  with gr.Row():
137
+ msg = gr.Textbox(
138
+ label="Your question",
139
+ placeholder="Ask something about college life..."
140
+ )
141
+ send_btn = gr.Button("Send")
142
 
143
  with gr.Row():
144
+ clear_btn = gr.Button("Clear Conversation")
145
+ export_btn = gr.Button("Export Conversation to CSV")
 
 
 
 
 
 
146
 
147
+ with gr.Row():
148
+ stage_box = gr.Textbox(label="Stage a response to selected category")
149
+ stage_btn = gr.Button("Stage Conversation to Category")
150
+ save_btn = gr.Button("Download Current Dataset")
151
+
152
+ # --- Events ---
153
+ send_btn.click(
154
+ respond,
155
+ inputs=[msg, category_dropdown, url_input],
156
+ outputs=[chatbot]
157
+ )
158
+
159
+ clear_btn.click(
160
+ clear_conversation,
161
+ outputs=[chatbot]
162
+ )
163
+
164
+ export_btn.click(
165
+ export_conversation_csv,
166
+ outputs=[gr.File(label="Download Conversation CSV")]
167
+ )
168
+
169
+ stage_btn.click(
170
+ stage_response_to_category,
171
+ inputs=[category_dropdown, stage_box],
172
+ outputs=[stage_box]
173
+ )
174
+
175
+ save_btn.click(
176
+ download_current_dataset,
177
+ outputs=[gr.File(label="Download Dataset JSON")]
178
+ )
179
+
180
+ if __name__ == "__main__":
181
+ demo.launch()