igortech commited on
Commit
116ad34
·
verified ·
1 Parent(s): 2e57be7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -92
app.py CHANGED
@@ -1,9 +1,8 @@
1
  import json
2
- import random
3
- import difflib
4
  import os
5
  import csv
6
- import datetime
 
7
  import gradio as gr
8
 
9
  # -----------------------------
@@ -11,136 +10,175 @@ import gradio as gr
11
  # -----------------------------
12
  DATA_PATH = "quotes.json"
13
 
14
- if os.path.exists(DATA_PATH):
15
- with open(DATA_PATH, "r") as f:
16
- dataset = json.load(f)
17
- else:
18
- dataset = {"staged_responses": []}
 
 
 
 
 
 
19
 
 
20
 
21
  # -----------------------------
22
- # Helpers
23
  # -----------------------------
24
- def find_best_quotes(category, user_input, top_n=3, threshold=0.4):
25
- """Find top_n most similar quotes for a category or return fallback if none match well"""
 
 
 
 
 
26
  if category not in dataset or not dataset[category]:
27
  return [f"No data about {user_input} (unknown)."]
28
 
29
- quotes = [q["quote"] for q in dataset[category]]
30
- scores = [difflib.SequenceMatcher(None, user_input.lower(), q.lower()).ratio() for q in quotes]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- # Pair scores with quotes and sort
33
- scored_quotes = sorted(zip(scores, quotes), key=lambda x: x[0], reverse=True)
34
 
35
- best_score = scored_quotes[0][0] if scored_quotes else 0
36
  if best_score < threshold:
37
  return [f"No data about {user_input} (unknown)."]
38
 
39
- return [q for _, q in scored_quotes[:top_n]]
40
 
 
 
 
 
 
41
 
42
- def save_conversation_to_staged(messages, category):
43
- """Stage conversation under chosen category in dataset (downloadable)"""
44
- if not messages:
45
- return "No conversation to stage."
 
 
46
 
47
- convo_text = " ".join([msg["content"] for msg in messages if msg["role"] == "user" or msg["role"] == "assistant"])
 
48
 
49
- if category not in dataset:
50
- dataset[category] = []
51
 
52
- dataset[category].append({"quote": convo_text})
53
- return f"Conversation staged under {category}."
54
 
 
55
 
56
- def export_conversation_csv(messages):
57
- """Export current conversation as CSV and return filename"""
58
- if not messages:
59
- return None
60
 
61
- filename = f"conversation_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
62
- with open(filename, "w", newline="", encoding="utf-8") as f:
63
- writer = csv.writer(f)
64
- writer.writerow(["role", "content"])
65
- for msg in messages:
66
- writer.writerow([msg["role"], msg["content"]])
67
- return filename
68
 
 
 
69
 
70
- def download_dataset():
71
- """Save dataset to a JSON file and return filename"""
72
- filename = f"quotes_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
73
- with open(filename, "w", encoding="utf-8") as f:
74
- json.dump(dataset, f, indent=2, ensure_ascii=False)
75
- return filename
76
 
 
 
 
77
 
78
  # -----------------------------
79
- # Chatbot core
80
  # -----------------------------
81
- def chatbot_reply(user_input, history, category):
82
- """Handle user query and return chatbot response with updated history"""
83
- if not user_input.strip():
84
- return history, history
85
-
86
- # Find 3-fold response
87
- responses = find_best_quotes(category, user_input, top_n=3)
88
-
89
- # Format assistant reply
90
- reply = "\n---\n".join(responses)
91
 
92
- # Append to history
93
- history.append({"role": "user", "content": user_input})
94
- history.append({"role": "assistant", "content": reply})
 
 
 
95
 
96
- return history, history
 
97
 
 
 
 
 
 
98
 
99
- def clear_conversation():
100
- return [], []
 
 
101
 
 
102
 
103
  # -----------------------------
104
  # UI
105
  # -----------------------------
106
  with gr.Blocks() as demo:
107
- gr.Markdown("# 🎓 Campus Life Chatbot")
108
 
109
  with gr.Row():
110
- with gr.Column():
111
- category_dropdown = gr.Dropdown(
112
- choices=list(dataset.keys()),
113
- value="Food" if "Food" in dataset else None,
114
- label="Select Category",
115
- )
116
-
117
- chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
118
- user_input = gr.Textbox(
119
- placeholder="Type your message and press Enter",
120
- show_label=False,
121
- )
122
- send_btn = gr.Button("Send")
123
- clear_btn = gr.Button("Clear")
124
-
125
- export_csv_btn = gr.Button("📤 Export Conversation to CSV")
126
- stage_btn = gr.Button("Stage Conversation to Category")
127
- download_json_btn = gr.Button("💾 Download Current Dataset")
128
-
129
- export_status = gr.Label(label="Status", value="")
130
-
131
- # Events
132
- send_btn.click(chatbot_reply, [user_input, chatbot, category_dropdown], [chatbot, chatbot])
133
- user_input.submit(chatbot_reply, [user_input, chatbot, category_dropdown], [chatbot, chatbot])
134
 
135
- clear_btn.click(clear_conversation, outputs=[chatbot, chatbot])
136
 
137
- export_csv_btn.click(export_conversation_csv, [chatbot], outputs=export_status)
138
- stage_btn.click(save_conversation_to_staged, [chatbot, category_dropdown], outputs=export_status)
139
- download_json_btn.click(download_dataset, outputs=export_status)
 
 
 
 
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- # -----------------------------
143
- # Launch
144
- # -----------------------------
145
  if __name__ == "__main__":
146
  demo.launch()
 
1
  import json
 
 
2
  import os
3
  import csv
4
+ import re
5
+ from difflib import SequenceMatcher
6
  import gradio as gr
7
 
8
  # -----------------------------
 
10
  # -----------------------------
11
  DATA_PATH = "quotes.json"
12
 
13
+ def load_dataset():
14
+ if os.path.exists(DATA_PATH):
15
+ with open(DATA_PATH, "r") as f:
16
+ data = json.load(f)
17
+ print(f"Loaded dataset from {DATA_PATH} with {len(data.keys())} categories.")
18
+ for cat, quotes in data.items():
19
+ print(f" - {cat}: {len(quotes)} entries")
20
+ return data
21
+ else:
22
+ print("No dataset found, starting with empty structure")
23
+ return {"staged_responses": []}
24
 
25
+ dataset = load_dataset()
26
 
27
  # -----------------------------
28
+ # Matching logic
29
  # -----------------------------
30
+ def normalize_text(s: str) -> str:
31
+ return re.sub(r'\W+', ' ', (s or "").lower()).strip()
32
+
33
+ def tokens(s: str):
34
+ return set(t for t in normalize_text(s).split() if t)
35
+
36
+ def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
37
  if category not in dataset or not dataset[category]:
38
  return [f"No data about {user_input} (unknown)."]
39
 
40
+ user_toks = tokens(user_input)
41
+ scored = []
42
+
43
+ for entry in dataset[category]:
44
+ qtext = entry.get("quote", "")
45
+ q_toks = tokens(qtext)
46
+
47
+ # Token overlap match
48
+ overlap = len(user_toks & q_toks)
49
+ if overlap > 0:
50
+ score = 1.0 + (overlap / max(1, len(q_toks)))
51
+ else:
52
+ # Fuzzy fallback
53
+ score = SequenceMatcher(None, user_input.lower(), qtext.lower()).ratio()
54
+
55
+ scored.append((score, qtext))
56
 
57
+ scored.sort(key=lambda x: x[0], reverse=True)
58
+ best_score = scored[0][0] if scored else 0.0
59
 
 
60
  if best_score < threshold:
61
  return [f"No data about {user_input} (unknown)."]
62
 
63
+ return [q for _s, q in scored[:top_n]]
64
 
65
+ # -----------------------------
66
+ # Response generation
67
+ # -----------------------------
68
+ def generate_response(category, user_input):
69
+ best_quotes = find_best_quotes(category, user_input, top_n=3)
70
 
71
+ if len(best_quotes) == 1 and best_quotes[0].startswith("No data"):
72
+ return (
73
+ f"Summary: {best_quotes[0]}",
74
+ f"Fusion: {best_quotes[0]}",
75
+ f"Reference: None"
76
+ )
77
 
78
+ # 1. Summary
79
+ summary = f"Summary: This is what people say about {category.lower()}."
80
 
81
+ # 2. Fusion
82
+ fusion = "Fusion: " + " ".join(best_quotes)
83
 
84
+ # 3. Reference
85
+ reference = f"Reference: Example article about {category.lower()} - https://example.com/{category.lower()}"
86
 
87
+ return summary, fusion, reference
88
 
89
+ # -----------------------------
90
+ # Gradio logic
91
+ # -----------------------------
92
+ conversation_history = []
93
 
94
+ def chat(user_input, category):
95
+ summary, fusion, reference = generate_response(category, user_input)
 
 
 
 
 
96
 
97
+ # 3-fold response
98
+ bot_response = f"{summary}\n\n{fusion}\n\n{reference}"
99
 
100
+ conversation_history.append({"role": "user", "content": user_input})
101
+ conversation_history.append({"role": "assistant", "content": bot_response})
102
+
103
+ return conversation_history
 
 
104
 
105
+ def clear_conversation():
106
+ conversation_history.clear()
107
+ return conversation_history
108
 
109
  # -----------------------------
110
+ # CSV Export
111
  # -----------------------------
112
+ def export_conversation():
113
+ if not conversation_history:
114
+ return None
115
+ filename = "conversation.csv"
116
+ with open(filename, "w", newline="") as f:
117
+ writer = csv.writer(f)
118
+ writer.writerow(["role", "content"])
119
+ for msg in conversation_history:
120
+ writer.writerow([msg["role"], msg["content"]])
121
+ return filename
122
 
123
+ # -----------------------------
124
+ # Save staged responses
125
+ # -----------------------------
126
+ def stage_conversation(category):
127
+ if not conversation_history:
128
+ return None
129
 
130
+ if "staged_responses" not in dataset:
131
+ dataset["staged_responses"] = []
132
 
133
+ staged_entry = {
134
+ "category": category,
135
+ "conversation": conversation_history.copy()
136
+ }
137
+ dataset["staged_responses"].append(staged_entry)
138
 
139
+ # Save to file for download
140
+ staged_file = "staged_responses.json"
141
+ with open(staged_file, "w") as f:
142
+ json.dump(dataset, f, indent=2)
143
 
144
+ return staged_file
145
 
146
  # -----------------------------
147
  # UI
148
  # -----------------------------
149
  with gr.Blocks() as demo:
150
+ gr.Markdown("## Campus Life Chatbot")
151
 
152
  with gr.Row():
153
+ category = gr.Dropdown(
154
+ choices=list(dataset.keys()),
155
+ label="Select Category",
156
+ value=list(dataset.keys())[0] if dataset else None
157
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
160
 
161
+ with gr.Row():
162
+ user_input = gr.Textbox(
163
+ label="Type your message",
164
+ placeholder="Ask about food, housing, professors...",
165
+ scale=4
166
+ )
167
+ send_btn = gr.Button("Send", scale=1)
168
 
169
+ with gr.Row():
170
+ clear_btn = gr.Button("Clear")
171
+ export_btn = gr.Button("Export Conversation to CSV")
172
+ stage_btn = gr.Button("Stage Conversation to Category")
173
+ download_btn = gr.Button("Download Updated Dataset")
174
+
175
+ # Event wiring
176
+ send_btn.click(chat, [user_input, category], chatbot)
177
+ user_input.submit(chat, [user_input, category], chatbot)
178
+ clear_btn.click(clear_conversation, None, chatbot)
179
+ export_btn.click(export_conversation, None, gr.File())
180
+ stage_btn.click(stage_conversation, category, gr.File())
181
+ download_btn.click(lambda: DATA_PATH, None, gr.File())
182
 
 
 
 
183
  if __name__ == "__main__":
184
  demo.launch()