igortech commited on
Commit
b2c8e1d
·
verified ·
1 Parent(s): 8885a6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -149
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import json
2
- import os
3
  import csv
4
- import re
5
- from difflib import SequenceMatcher
6
  import gradio as gr
7
 
8
  # -----------------------------
@@ -10,175 +9,123 @@ import gradio as gr
10
  # -----------------------------
11
  DATA_PATH = "quotes.json"
12
 
13
- def load_dataset():
14
- if os.path.exists(DATA_PATH):
15
- with open(DATA_PATH, "r") as f:
16
- data = json.load(f)
17
- print(f"Loaded dataset from {DATA_PATH} with {len(data.keys())} categories.")
18
- for cat, quotes in data.items():
19
- print(f" - {cat}: {len(quotes)} entries")
20
- return data
21
- else:
22
- print("No dataset found, starting with empty structure")
23
- return {"staged_responses": []}
24
-
25
- dataset = load_dataset()
26
-
27
- # -----------------------------
28
- # Matching logic
29
- # -----------------------------
30
- def normalize_text(s: str) -> str:
31
- return re.sub(r'\W+', ' ', (s or "").lower()).strip()
32
-
33
- def tokens(s: str):
34
- return set(t for t in normalize_text(s).split() if t)
35
-
36
- def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
37
- if category not in dataset or not dataset[category]:
38
- return [f"No data about {user_input} (unknown)."]
39
-
40
- user_toks = tokens(user_input)
41
- scored = []
42
-
43
- for entry in dataset[category]:
44
- qtext = entry.get("quote", "")
45
- q_toks = tokens(qtext)
46
-
47
- # Token overlap match
48
- overlap = len(user_toks & q_toks)
49
- if overlap > 0:
50
- score = 1.0 + (overlap / max(1, len(q_toks)))
51
- else:
52
- # Fuzzy fallback
53
- score = SequenceMatcher(None, user_input.lower(), qtext.lower()).ratio()
54
-
55
- scored.append((score, qtext))
56
 
57
- scored.sort(key=lambda x: x[0], reverse=True)
58
- best_score = scored[0][0] if scored else 0.0
59
-
60
- if best_score < threshold:
61
- return [f"No data about {user_input} (unknown)."]
62
-
63
- return [q for _s, q in scored[:top_n]]
64
 
65
  # -----------------------------
66
- # Response generation
67
  # -----------------------------
68
- def generate_response(category, user_input):
69
- best_quotes = find_best_quotes(category, user_input, top_n=3)
70
-
71
- if len(best_quotes) == 1 and best_quotes[0].startswith("No data"):
72
- return (
73
- f"Summary: {best_quotes[0]}",
74
- f"Fusion: {best_quotes[0]}",
75
- f"Reference: None"
76
- )
77
-
78
- # 1. Summary
79
- summary = f"Summary: This is what people say about {category.lower()}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- # 2. Fusion
82
- fusion = "Fusion: " + " ".join(best_quotes)
 
83
 
84
- # 3. Reference
85
- reference = f"Reference: Example article about {category.lower()} - https://example.com/{category.lower()}"
86
 
87
- return summary, fusion, reference
88
 
89
- # -----------------------------
90
- # Gradio logic
91
- # -----------------------------
92
- conversation_history = []
93
 
94
- def chat(user_input, category):
95
- summary, fusion, reference = generate_response(category, user_input)
96
 
97
- # 3-fold response
98
- bot_response = f"{summary}\n\n{fusion}\n\n{reference}"
99
 
100
- conversation_history.append({"role": "user", "content": user_input})
101
- conversation_history.append({"role": "assistant", "content": bot_response})
102
 
103
- return conversation_history
 
104
 
105
- def clear_conversation():
106
- conversation_history.clear()
107
- return conversation_history
108
 
109
- # -----------------------------
110
- # CSV Export
111
- # -----------------------------
112
- def export_conversation():
113
- if not conversation_history:
114
- return None
115
- filename = "conversation.csv"
116
- with open(filename, "w", newline="") as f:
117
  writer = csv.writer(f)
118
- writer.writerow(["role", "content"])
119
- for msg in conversation_history:
120
- writer.writerow([msg["role"], msg["content"]])
121
- return filename
122
-
123
- # -----------------------------
124
- # Save staged responses
125
- # -----------------------------
126
- def stage_conversation(category):
127
- if not conversation_history:
128
- return None
129
-
130
- if "staged_responses" not in dataset:
131
- dataset["staged_responses"] = []
132
 
133
- staged_entry = {
134
- "category": category,
135
- "conversation": conversation_history.copy()
136
- }
137
- dataset["staged_responses"].append(staged_entry)
138
 
139
- # Save to file for download
140
- staged_file = "staged_responses.json"
141
- with open(staged_file, "w") as f:
142
- json.dump(dataset, f, indent=2)
143
 
144
- return staged_file
145
 
146
  # -----------------------------
147
  # UI
148
  # -----------------------------
149
  with gr.Blocks() as demo:
150
- gr.Markdown("## Campus Life Chatbot")
151
-
152
- with gr.Row():
153
- category = gr.Dropdown(
154
- choices=list(dataset.keys()),
155
- label="Select Category",
156
- value=list(dataset.keys())[0] if dataset else None
157
- )
158
-
159
- chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
160
-
161
- with gr.Row():
162
- user_input = gr.Textbox(
163
- label="Type your message",
164
- placeholder="Ask about food, housing, professors...",
165
- scale=4
166
- )
167
- send_btn = gr.Button("Send", scale=1)
168
-
169
- with gr.Row():
170
- clear_btn = gr.Button("Clear")
171
- export_btn = gr.Button("Export Conversation to CSV")
172
- stage_btn = gr.Button("Stage Conversation to Category")
173
- download_btn = gr.Button("Download Updated Dataset")
174
-
175
- # Event wiring
176
- send_btn.click(chat, [user_input, category], chatbot)
177
- user_input.submit(chat, [user_input, category], chatbot)
178
- clear_btn.click(clear_conversation, None, chatbot)
179
- export_btn.click(export_conversation, None, gr.File())
180
- stage_btn.click(stage_conversation, category, gr.File())
181
- download_btn.click(lambda: DATA_PATH, None, gr.File())
182
 
183
  if __name__ == "__main__":
184
  demo.launch()
 
1
  import json
2
+ import difflib
3
  import csv
4
+ import os
 
5
  import gradio as gr
6
 
7
  # -----------------------------
 
9
  # -----------------------------
10
  DATA_PATH = "quotes.json"
11
 
12
+ if os.path.exists(DATA_PATH):
13
+ with open(DATA_PATH, "r", encoding="utf-8") as f:
14
+ dataset = json.load(f)
15
+ else:
16
+ dataset = {"staged_responses": []}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
 
 
 
 
 
 
 
18
 
19
  # -----------------------------
20
+ # Helpers
21
  # -----------------------------
22
+ def find_best_matches(user_input, category=None, n=3, threshold=0.4):
23
+ """
24
+ Try to find best fuzzy matches in the dataset.
25
+ If category is given and fails, fallback to all categories.
26
+ """
27
+ matches = []
28
+ search_categories = [category] if category and category in dataset else dataset.keys()
29
+
30
+ # First pass: search within selected category
31
+ for cat in search_categories:
32
+ if cat == "staged_responses":
33
+ continue
34
+ for item in dataset.get(cat, []):
35
+ text = item.get("quote", "")
36
+ score = difflib.SequenceMatcher(None, user_input.lower(), text.lower()).ratio()
37
+ if score >= threshold:
38
+ matches.append((score, text, cat))
39
+
40
+ # If nothing found and category was specified, search all categories
41
+ if not matches and category and category in dataset:
42
+ for cat in dataset.keys():
43
+ if cat == "staged_responses":
44
+ continue
45
+ for item in dataset.get(cat, []):
46
+ text = item.get("quote", "")
47
+ score = difflib.SequenceMatcher(None, user_input.lower(), text.lower()).ratio()
48
+ if score >= threshold:
49
+ matches.append((score, text, cat))
50
+
51
+ # Sort and return top n
52
+ matches.sort(key=lambda x: x[0], reverse=True)
53
+ return matches[:n]
54
+
55
+
56
+ def chatbot_response(message, history, category):
57
+ if not message.strip():
58
+ return history + [("User", "Message is empty.")]
59
+
60
+ best_matches = find_best_matches(message, category)
61
+
62
+ if best_matches:
63
+ responses = [f"[{cat}] {quote}" for _, quote, cat in best_matches]
64
+ else:
65
+ responses = [f"No data about {message}."]
66
 
67
+ history.append(("User", message))
68
+ for resp in responses:
69
+ history.append(("Bot", resp))
70
 
71
+ return history
 
72
 
 
73
 
74
+ def stage_response(message, category):
75
+ """Stage a message into a category in dataset."""
76
+ if not message.strip():
77
+ return "Message is empty."
78
 
79
+ if category not in dataset:
80
+ dataset[category] = []
81
 
82
+ dataset[category].append({"quote": message})
83
+ return f"Message staged to category '{category}'."
84
 
 
 
85
 
86
+ def download_json():
87
+ return json.dumps(dataset, indent=2, ensure_ascii=False)
88
 
 
 
 
89
 
90
+ def download_csv():
91
+ csv_file = "dataset.csv"
92
+ with open(csv_file, "w", newline="", encoding="utf-8") as f:
 
 
 
 
 
93
  writer = csv.writer(f)
94
+ writer.writerow(["Category", "Quote"])
95
+ for cat, items in dataset.items():
96
+ if cat == "staged_responses":
97
+ continue
98
+ for item in items:
99
+ writer.writerow([cat, item.get("quote", "")])
100
+ return csv_file
 
 
 
 
 
 
 
101
 
 
 
 
 
 
102
 
103
+ def clear_history():
104
+ return []
 
 
105
 
 
106
 
107
  # -----------------------------
108
  # UI
109
  # -----------------------------
110
  with gr.Blocks() as demo:
111
+ gr.Markdown("# 🎓 Campus Experience Chatbot")
112
+
113
+ chatbot = gr.Chatbot(label="Conversation", type="messages")
114
+ msg = gr.Textbox(label="Type your question here...", placeholder="Ask me anything about campus life", lines=2)
115
+ category = gr.Dropdown(choices=[c for c in dataset.keys() if c != "staged_responses"], label="Select Category")
116
+ send = gr.Button("Send")
117
+ stage_btn = gr.Button("Stage conversation to category")
118
+ download_json_btn = gr.Button("Download JSON")
119
+ download_csv_btn = gr.Button("Download CSV")
120
+ clear = gr.Button("Clear Conversation")
121
+
122
+ send.click(chatbot_response, inputs=[msg, chatbot, category], outputs=chatbot)
123
+ msg.submit(chatbot_response, inputs=[msg, chatbot, category], outputs=chatbot)
124
+
125
+ stage_btn.click(stage_response, inputs=[msg, category], outputs=None)
126
+ download_json_btn.click(download_json, outputs=gr.File())
127
+ download_csv_btn.click(download_csv, outputs=gr.File())
128
+ clear.click(clear_history, outputs=chatbot)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  if __name__ == "__main__":
131
  demo.launch()