igortech commited on
Commit
35eb385
·
verified ·
1 Parent(s): 92c739f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -95
app.py CHANGED
@@ -3,136 +3,194 @@ import os
3
  import re
4
  import csv
5
  import tempfile
6
- import datetime
7
  from difflib import SequenceMatcher
 
8
  import gradio as gr
 
9
 
10
  # -----------------------------
11
  # Config / data loading
12
  # -----------------------------
13
  DATA_PATH = "quotes.json"
14
 
15
-
16
- def load_dataset():
17
  if os.path.exists(DATA_PATH):
18
- with open(DATA_PATH, "r", encoding="utf-8") as f:
19
- return json.load(f)
20
- return {"staged_responses": []}
21
-
22
-
23
- def save_dataset(data):
24
- with open(DATA_PATH, "w", encoding="utf-8") as f:
25
- json.dump(data, f, indent=2, ensure_ascii=False)
26
-
27
-
28
- dataset = load_dataset()
 
29
 
30
  # -----------------------------
31
- # Core logic
32
  # -----------------------------
33
- def find_best_matches(user_input, dataset, top_n=3, threshold=0.3):
34
- matches = []
35
- for category, quotes in dataset.items():
36
- if category == "staged_responses":
37
- continue
38
- for entry in quotes:
39
- quote = entry["quote"]
40
- score = SequenceMatcher(None, user_input.lower(), quote.lower()).ratio()
41
- matches.append((score, category, quote))
42
- matches.sort(key=lambda x: x[0], reverse=True)
43
- return [m for m in matches if m[0] >= threshold][:top_n]
44
-
45
-
46
- def generate_response(message, history):
47
- matches = find_best_matches(message, dataset)
48
-
49
- if not matches:
50
- return (
51
- history
52
- + [{"role": "assistant", "content": f"No data about {message}."}]
53
- )
54
 
55
- responses = []
56
- for score, category, quote in matches:
57
- responses.append(f"Category: {category}\nWhat real people say:\n{quote}")
58
 
59
- reply = "\n\n".join(responses)
60
- return history + [{"role": "assistant", "content": reply}]
61
 
 
 
62
 
63
- # -----------------------------
64
- # Conversation & staging
65
- # -----------------------------
66
- def stage_conversation(history, category):
67
- if not history:
68
- return "No conversation to stage."
69
-
70
- convo_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in history])
71
 
72
- new_entry = {"quote": convo_text}
 
 
 
 
 
 
 
 
73
 
74
- if "staged_responses" not in dataset:
75
- dataset["staged_responses"] = []
76
- dataset["staged_responses"].append(new_entry)
77
-
78
- save_dataset(dataset)
79
- return f"Conversation staged under {category}."
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  # -----------------------------
83
- # Download helpers
84
  # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  def download_conversation_csv(history):
86
  if not history:
87
- return None
88
-
89
- tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8")
90
- writer = csv.writer(tmpfile)
91
- writer.writerow(["role", "content"])
92
- for msg in history:
93
- writer.writerow([msg["role"], msg["content"]])
94
- tmpfile.close()
95
- return tmpfile.name
96
-
97
-
98
- def download_dataset():
99
- tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding="utf-8")
100
- json.dump(dataset, tmpfile, indent=2, ensure_ascii=False)
101
- tmpfile.close()
102
- return tmpfile.name
103
-
104
 
105
  # -----------------------------
106
- # Gradio UI
107
  # -----------------------------
108
  with gr.Blocks() as demo:
109
- gr.Markdown("# Campus Conversation Bot")
110
 
111
- chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
112
- msg = gr.Textbox(label="Type your question", placeholder="Ask me something...", container=True)
113
 
114
  with gr.Row():
115
- clear_btn = gr.Button("Clear")
116
- export_csv_btn = gr.Button("Export Conversation to CSV")
117
- download_json_btn = gr.Button("Download Current Dataset")
118
-
119
- with gr.Row():
120
- category_dropdown = gr.Dropdown(choices=list(dataset.keys()), label="Choose category to stage", interactive=True)
121
- stage_btn = gr.Button("Stage Conversation to Category")
122
 
123
- # Events
124
- msg.submit(generate_response, [msg, chatbot], chatbot)
125
- msg.submit(lambda: "", None, msg) # clear textbox on Enter
126
- clear_btn.click(lambda: [], None, chatbot)
127
 
128
- export_csv_file = gr.File(label="Download Conversation CSV")
129
- export_csv_btn.click(download_conversation_csv, chatbot, export_csv_file)
 
 
 
130
 
131
- download_json_file = gr.File(label="Download Dataset JSON")
132
- download_json_btn.click(download_dataset, None, download_json_file)
 
 
133
 
134
- stage_btn.click(stage_conversation, [chatbot, category_dropdown], None)
 
 
135
 
 
 
 
 
 
 
 
136
 
137
  if __name__ == "__main__":
138
- demo.launch()
 
3
  import re
4
  import csv
5
  import tempfile
 
6
  from difflib import SequenceMatcher
7
+ import datetime
8
  import gradio as gr
9
+ from rapidfuzz import fuzz
10
 
11
  # -----------------------------
12
  # Config / data loading
13
  # -----------------------------
14
  DATA_PATH = "quotes.json"
15
 
16
+ def load_quotes():
 
17
  if os.path.exists(DATA_PATH):
18
+ try:
19
+ with open(DATA_PATH, "r", encoding="utf-8") as f:
20
+ data = json.load(f)
21
+ if isinstance(data, dict):
22
+ print(f"Loaded dataset from {DATA_PATH} with {len(data)} categories.")
23
+ return data
24
+ except Exception as e:
25
+ print(f"Failed to load {DATA_PATH}: {e}")
26
+ print("No dataset file found. Upload one via the UI.")
27
+ return {}
28
+
29
+ QUOTES = load_quotes()
30
 
31
  # -----------------------------
32
+ # Text helpers
33
  # -----------------------------
34
+ STOPWORDS = {
35
+ "the","a","an","and","or","but","if","then","so","than","to","of","in","on","at","for",
36
+ "is","are","was","were","be","being","been","it","that","this","these","those","with",
37
+ "as","by","from","about","into","over","after","before","up","down","out"
38
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ POS_HINTS = {"good","great","love","like","enjoy","awesome","amazing","nice","positive","best","fantastic","excellent"}
41
+ NEG_HINTS = {"bad","hate","dislike","worst","awful","terrible","negative","poor","meh","gross","unsafe","hard","difficult"}
 
42
 
43
+ punct_re = re.compile(f"[{re.escape('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')}]")
 
44
 
45
+ def normalize(text: str) -> str:
46
+ return punct_re.sub(" ", (text or "").lower())
47
 
48
+ def tokenize(text: str):
49
+ return [t for t in normalize(text).split() if t and t not in STOPWORDS]
 
 
 
 
 
 
50
 
51
+ def infer_sentiment(user_text: str) -> str:
52
+ tl = normalize(user_text)
53
+ has_pos = any(w in tl for w in POS_HINTS)
54
+ has_neg = any(w in tl for w in NEG_HINTS)
55
+ if has_pos and not has_neg:
56
+ return "positive"
57
+ if has_neg and not has_pos:
58
+ return "negative"
59
+ return "positive"
60
 
61
+ # -----------------------------
62
+ # Retrieval
63
+ # -----------------------------
64
+ def best_match_quote(user_text: str) -> str:
65
+ """Search across all categories with fuzzy matching and return best quote."""
66
+ max_score = 0
67
+ best_quote = None
68
+
69
+ for cat_quotes in QUOTES.values():
70
+ for q_obj in cat_quotes:
71
+ q_text = q_obj.get("quote", "")
72
+ score = fuzz.token_set_ratio(user_text.lower(), q_text.lower())
73
+ if score > max_score:
74
+ max_score = score
75
+ best_quote = q_text
76
+
77
+ if max_score < 30: # threshold; anything below treated as unknown
78
+ return f"No data about '{user_text}'"
79
+ return best_quote
80
 
81
+ # -----------------------------
82
+ # 3-fold response generation
83
+ # -----------------------------
84
+ def generate_three_fold_response(user_text: str):
85
+ quote = best_match_quote(user_text)
86
+ if quote.startswith("No data"):
87
+ return [quote, "", ""]
88
+
89
+ # Very simple 3-fold split
90
+ sentences = [s.strip() for s in quote.split('.') if s.strip()]
91
+ first = sentences[0] if len(sentences) > 0 else ""
92
+ second = " ".join(sentences[1:3]) if len(sentences) > 2 else (sentences[1] if len(sentences)>1 else "")
93
+ third = " ".join(sentences[3:]) if len(sentences) > 3 else ""
94
+ return [first, second, third]
95
 
96
  # -----------------------------
97
+ # Gradio callbacks
98
  # -----------------------------
99
+ def respond(message, history, category):
100
+ if not message:
101
+ return "", history
102
+ responses = generate_three_fold_response(message)
103
+ bot_response = [
104
+ {"label": "Summary", "text": responses[0]},
105
+ {"label": "Details", "text": responses[1]},
106
+ {"label": "What real people say", "text": responses[2]}
107
+ ]
108
+ history.append((message, bot_response))
109
+ return "", history
110
+
111
+ def clear_chat():
112
+ return None
113
+
114
+ def upload_json(filepath):
115
+ global QUOTES, DATA_PATH
116
+ try:
117
+ with open(filepath, "r", encoding="utf-8") as f:
118
+ data = json.load(f)
119
+ if not isinstance(data, dict):
120
+ return gr.update(value="Upload failed: JSON root must be an object."), gr.update(choices=[])
121
+ QUOTES = data
122
+ DATA_PATH = os.path.basename(filepath)
123
+ cats = sorted(list(QUOTES.keys()))
124
+ status = f"Loaded {len(cats)} categories from {DATA_PATH}."
125
+ return status, gr.update(choices=cats, value=(cats[0] if cats else None))
126
+ except Exception as e:
127
+ return f"Error loading file: {e}", gr.update(choices=[])
128
+
129
+ def download_current_json():
130
+ tmp = DATA_PATH or "quotes_export.json"
131
+ with open(tmp, "w", encoding="utf-8") as f:
132
+ json.dump(QUOTES, f, indent=2, ensure_ascii=False)
133
+ return tmp
134
+
135
  def download_conversation_csv(history):
136
  if not history:
137
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
138
+ tmp.close()
139
+ return tmp.name
140
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode='w', newline='', encoding='utf-8')
141
+ writer = csv.writer(tmp)
142
+ writer.writerow(["User Message", "Summary", "Details", "What real people say"])
143
+ for msg, bot_resp in history:
144
+ summary = bot_resp[0]['text']
145
+ details = bot_resp[1]['text']
146
+ real_people = bot_resp[2]['text']
147
+ writer.writerow([msg, summary, details, real_people])
148
+ tmp.close()
149
+ return tmp.name
 
 
 
 
150
 
151
  # -----------------------------
152
+ # UI
153
  # -----------------------------
154
  with gr.Blocks() as demo:
155
+ gr.Markdown("## 🎓 College Life Chatbot — Category-Aware, 3-Fold Responses")
156
 
157
+ # Category list from loaded data (may be empty until upload)
158
+ initial_categories = sorted(list(QUOTES.keys()))
159
 
160
  with gr.Row():
161
+ category = gr.Dropdown(
162
+ label="Category",
163
+ choices=initial_categories,
164
+ value=(initial_categories[0] if initial_categories else None)
165
+ )
 
 
166
 
167
+ chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
168
+ msg = gr.Textbox(label="Your message", placeholder="Ask something like: 'Is food good in college?'", autofocus=True)
169
+ send = gr.Button("Send")
170
+ clear = gr.Button("Clear")
171
 
172
+ with gr.Row():
173
+ uploader = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
174
+ upload_status = gr.Textbox(label="Upload status", interactive=False)
175
+ download_json_btn = gr.File(label="Download dataset")
176
+ download_csv_btn = gr.File(label="Export conversation to CSV")
177
 
178
+ # Wire events
179
+ msg.submit(respond, [msg, chatbot, category], [msg, chatbot])
180
+ send.click(respond, [msg, chatbot, category], [msg, chatbot])
181
+ clear.click(clear_chat, None, chatbot, queue=False)
182
 
183
+ uploader.upload(upload_json, uploader, [upload_status, category])
184
+ download_json_btn.download(download_current_json)
185
+ download_csv_btn.click(download_conversation_csv, chatbot, download_csv_btn)
186
 
187
+ # -----------------------------
188
+ # Startup log
189
+ # -----------------------------
190
+ print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
191
+ if QUOTES:
192
+ for cat, qlist in QUOTES.items():
193
+ print(f" - {cat}: {len(qlist)} entries")
194
 
195
  if __name__ == "__main__":
196
+ demo.launch(server_name="0.0.0.0", server_port=7860)