R-Kentaren commited on
Commit
f7b1360
Β·
verified Β·
1 Parent(s): 25e7c41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -25
app.py CHANGED
@@ -10,8 +10,10 @@ import gradio as gr
10
  import torch
11
  from transformers import pipeline, TextIteratorStreamer
12
  from transformers import AutoTokenizer
13
- from ddgs import DDGS
14
- from config import MODELS # Import from config file
 
 
15
 
16
  # Global event to signal cancellation from the UI thread to the generation thread
17
  cancel_event = threading.Event()
@@ -21,6 +23,94 @@ access_token = os.environ.get('HF_TOKEN', '')
21
  # Global cache for pipelines to avoid re-loading.
22
  PIPELINES = {}
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def load_pipeline(model_name):
25
  """
26
  Load and cache a transformers pipeline for text generation.
@@ -58,18 +148,6 @@ def load_pipeline(model_name):
58
  PIPELINES[model_name] = pipe
59
  return pipe
60
 
61
- def retrieve_context(query, max_results=6, max_chars=50):
62
- """
63
- Retrieve search snippets from DuckDuckGo (runs in background).
64
- Returns a list of result strings.
65
- """
66
- try:
67
- with DDGS() as ddgs:
68
- return [f"{i+1}. {r.get('title','No Title')} - {r.get('body','')[:max_chars]}"
69
- for i, r in enumerate(islice(ddgs.text(query, region="wt-wt", safesearch="off", timelimit="y"), max_results))]
70
- except Exception:
71
- return []
72
-
73
  def format_conversation(history, system_prompt, tokenizer):
74
  if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
75
  messages = [{"role": "system", "content": system_prompt.strip()}] + history
@@ -123,7 +201,7 @@ def chat_response(user_msg, chat_history, system_prompt,
123
  debug = ''
124
  search_results = []
125
  if enable_search:
126
- debug = 'Search task started.'
127
  thread_search = threading.Thread(
128
  target=lambda: search_results.extend(
129
  retrieve_context(user_msg, int(max_results), int(max_chars))
@@ -138,11 +216,11 @@ def chat_response(user_msg, chat_history, system_prompt,
138
  if enable_search:
139
  thread_search.join(timeout=float(search_timeout))
140
  if search_results:
141
- debug = "### Search results merged into prompt\n\n" + "\n".join(
142
  f"- {r}" for r in search_results
143
  )
144
  else:
145
- debug = "*No web search results found.*"
146
 
147
  try:
148
  cur_date = datetime.now().strftime('%Y-%m-%d')
@@ -151,7 +229,7 @@ def chat_response(user_msg, chat_history, system_prompt,
151
  if search_results:
152
  enriched = system_prompt.strip() + f"""
153
  # SEARCH CONTEXT (TRUSTED SOURCES ONLY)
154
- Below are web search results. Treat them as the ONLY source of truth for answering.
155
  {search_results}
156
 
157
  RULES (VERY IMPORTANT):
@@ -289,7 +367,7 @@ def update_duration_estimate(model_name, enable_search, max_results, max_chars,
289
  model_size = get_model_size(model_name)
290
  return (f"⏱️ **Estimated GPU Time: {duration:.1f} seconds**\n\n"
291
  f"πŸ“Š **Model Size:** {model_size:.1f}B parameters\n"
292
- f"πŸ” **Web Search:** {'Enabled' if enable_search else 'Disabled'}")
293
  except Exception as e:
294
  return f"⚠️ Error calculating estimate: {e}"
295
 
@@ -310,11 +388,26 @@ with gr.Blocks(
310
  .chatbot { border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); }
311
  button.primary { font-weight: 600; }
312
  .gradio-accordion { margin-bottom: 12px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  """
314
  ) as demo:
315
  # Header
316
  gr.Markdown("""
317
- # 🧠 CPU LLM Inference
318
  """)
319
 
320
  with gr.Row():
@@ -330,9 +423,9 @@ with gr.Blocks(
330
  info="Select the language model to use"
331
  )
332
  search_chk = gr.Checkbox(
333
- label="πŸ” Enable Web Search",
334
  value=False,
335
- info="Augment responses with real-time web data"
336
  )
337
  sys_prompt = gr.Textbox(label="πŸ“ System Prompt", lines=3, value=update_default_prompt(False), placeholder="Define the assistant's behavior and personality...")
338
 
@@ -388,6 +481,10 @@ with gr.Blocks(
388
  label="Search Timeout (s)",
389
  info="Maximum time to wait for search results"
390
  )
 
 
 
 
391
 
392
  # Actions
393
  with gr.Row():
@@ -400,8 +497,15 @@ with gr.Blocks(
400
  height=600,
401
  label="πŸ’¬ Conversation",
402
  show_copy_button=True,
403
- avatar_images=(None, "πŸ€–"),
404
- bubble_full_width=False
 
 
 
 
 
 
 
405
  )
406
 
407
  # Input Area
@@ -440,7 +544,8 @@ with gr.Blocks(
440
  ---
441
  πŸ’‘ **Tips:**
442
  - Use **Advanced Parameters** to fine-tune creativity and response length
443
- - Enable **Web Search** for real-time, up-to-date information
 
444
  - Try different **models** for various tasks (reasoning, coding, general chat)
445
  - Click the **Copy** button on responses to save them to your clipboard
446
  """, elem_classes="footer")
 
10
  import torch
11
  from transformers import pipeline, TextIteratorStreamer
12
  from transformers import AutoTokenizer
13
+ from bs4 import BeautifulSoup
14
+ import requests
15
+ from urllib.parse import quote_plus
16
+ from config import MODELS
17
 
18
  # Global event to signal cancellation from the UI thread to the generation thread
19
  cancel_event = threading.Event()
 
23
  # Global cache for pipelines to avoid re-loading.
24
  PIPELINES = {}
25
 
26
+ # Base64 encoded simple avatar images (1x1 pixel transparent PNG)
27
+ # These are minimal placeholders - you can replace with actual base64 images
28
+ USER_AVATAR = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
29
+ BOT_AVATAR = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
30
+
31
+ def google_search(query, max_results=6, max_chars=50):
32
+ """
33
+ Perform Google search without API (scraping).
34
+ Safe search is turned off.
35
+ """
36
+ try:
37
+ # Prepare search URL with safe search off
38
+ search_url = f"https://www.google.com/search?q={quote_plus(query)}&safe=off&num={max_results}"
39
+
40
+ headers = {
41
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
42
+ }
43
+
44
+ response = requests.get(search_url, headers=headers, timeout=10)
45
+ response.raise_for_status()
46
+
47
+ soup = BeautifulSoup(response.text, 'html.parser')
48
+
49
+ # Find search result containers
50
+ results = []
51
+ search_results = soup.find_all('div', class_='g')
52
+
53
+ for i, result in enumerate(search_results[:max_results]):
54
+ try:
55
+ # Get title
56
+ title_elem = result.find('h3')
57
+ title = title_elem.text if title_elem else "No Title"
58
+
59
+ # Get snippet/description
60
+ snippet_elem = result.find('div', class_='VwiC3b')
61
+ if not snippet_elem:
62
+ snippet_elem = result.find('div', class_='IsZvec')
63
+ snippet = snippet_elem.text if snippet_elem else ""
64
+
65
+ # Get link
66
+ link_elem = result.find('a')
67
+ link = link_elem.get('href') if link_elem else ""
68
+ if link and link.startswith('/url?q='):
69
+ link = link.split('/url?q=')[1].split('&')[0]
70
+
71
+ # Truncate snippet
72
+ if len(snippet) > max_chars:
73
+ snippet = snippet[:max_chars] + "..."
74
+
75
+ results.append({
76
+ 'title': title,
77
+ 'snippet': snippet,
78
+ 'link': link
79
+ })
80
+ except Exception as e:
81
+ continue
82
+
83
+ # Format results
84
+ formatted_results = []
85
+ for i, r in enumerate(results):
86
+ formatted_results.append(f"{i+1}. {r['title']} - {r['snippet']}")
87
+
88
+ return formatted_results
89
+
90
+ except Exception as e:
91
+ print(f"Google search error: {e}")
92
+ return []
93
+
94
+ def retrieve_context(query, max_results=6, max_chars=50):
95
+ """
96
+ Retrieve search snippets from Google (scraping, no API).
97
+ Safe search is off.
98
+ Returns a list of result strings.
99
+ """
100
+ try:
101
+ results = google_search(query, max_results, max_chars)
102
+ if results:
103
+ return results
104
+ else:
105
+ # Fallback to DDG if Google fails
106
+ from ddgs import DDGS
107
+ with DDGS() as ddgs:
108
+ return [f"{i+1}. {r.get('title','No Title')} - {r.get('body','')[:max_chars]}"
109
+ for i, r in enumerate(islice(ddgs.text(query, region="wt-wt", safesearch="off", timelimit="y"), max_results))]
110
+ except Exception as e:
111
+ print(f"Search error: {e}")
112
+ return []
113
+
114
  def load_pipeline(model_name):
115
  """
116
  Load and cache a transformers pipeline for text generation.
 
148
  PIPELINES[model_name] = pipe
149
  return pipe
150
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  def format_conversation(history, system_prompt, tokenizer):
152
  if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
153
  messages = [{"role": "system", "content": system_prompt.strip()}] + history
 
201
  debug = ''
202
  search_results = []
203
  if enable_search:
204
+ debug = 'πŸ” Google search started (safe search: OFF)...'
205
  thread_search = threading.Thread(
206
  target=lambda: search_results.extend(
207
  retrieve_context(user_msg, int(max_results), int(max_chars))
 
216
  if enable_search:
217
  thread_search.join(timeout=float(search_timeout))
218
  if search_results:
219
+ debug = f"βœ… Google search completed - Found {len(search_results)} results\n\n" + "\n".join(
220
  f"- {r}" for r in search_results
221
  )
222
  else:
223
+ debug = "❌ No web search results found."
224
 
225
  try:
226
  cur_date = datetime.now().strftime('%Y-%m-%d')
 
229
  if search_results:
230
  enriched = system_prompt.strip() + f"""
231
  # SEARCH CONTEXT (TRUSTED SOURCES ONLY)
232
+ Below are Google search results. Treat them as the ONLY source of truth for answering.
233
  {search_results}
234
 
235
  RULES (VERY IMPORTANT):
 
367
  model_size = get_model_size(model_name)
368
  return (f"⏱️ **Estimated GPU Time: {duration:.1f} seconds**\n\n"
369
  f"πŸ“Š **Model Size:** {model_size:.1f}B parameters\n"
370
+ f"πŸ” **Web Search:** {'Enabled (Google, SafeSearch: OFF)' if enable_search else 'Disabled'}")
371
  except Exception as e:
372
  return f"⚠️ Error calculating estimate: {e}"
373
 
 
388
  .chatbot { border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); }
389
  button.primary { font-weight: 600; }
390
  .gradio-accordion { margin-bottom: 12px; }
391
+ /* Custom avatar styling */
392
+ .message-wrap { align-items: flex-start !important; }
393
+ .avatar-image {
394
+ border-radius: 50% !important;
395
+ border: 2px solid #667eea !important;
396
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
397
+ }
398
+ .bot-avatar {
399
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
400
+ padding: 2px !important;
401
+ }
402
+ .user-avatar {
403
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
404
+ padding: 2px !important;
405
+ }
406
  """
407
  ) as demo:
408
  # Header
409
  gr.Markdown("""
410
+ # 🧠 LLM Inference with Google Search
411
  """)
412
 
413
  with gr.Row():
 
423
  info="Select the language model to use"
424
  )
425
  search_chk = gr.Checkbox(
426
+ label="πŸ” Enable Web Search (Google, SafeSearch: OFF)",
427
  value=False,
428
+ info="Augment responses with real-time web data from Google (no API required)"
429
  )
430
  sys_prompt = gr.Textbox(label="πŸ“ System Prompt", lines=3, value=update_default_prompt(False), placeholder="Define the assistant's behavior and personality...")
431
 
 
481
  label="Search Timeout (s)",
482
  info="Maximum time to wait for search results"
483
  )
484
+ gr.Markdown("""
485
+ ⚠️ **Note:** Google search uses web scraping (no API required).
486
+ SafeSearch is **OFF** for comprehensive results.
487
+ """)
488
 
489
  # Actions
490
  with gr.Row():
 
497
  height=600,
498
  label="πŸ’¬ Conversation",
499
  show_copy_button=True,
500
+ avatar_images=(
501
+ "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='40' height='40'%3E%3Crect width='40' height='40' rx='20' fill='%23f093fb'/%3E%3Ctext x='20' y='28' text-anchor='middle' font-size='20' fill='white' font-family='Arial'%3EπŸ‘€%3C/text%3E%3C/svg%3E", # User avatar
502
+ "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='40' height='40'%3E%3Crect width='40' height='40' rx='20' fill='%23667eea'/%3E%3Ctext x='20' y='28' text-anchor='middle' font-size='20' fill='white' font-family='Arial'%3EπŸ€–%3C/text%3E%3C/svg%3E" # Bot avatar
503
+ ),
504
+ bubble_full_width=False,
505
+ render_markdown=True,
506
+ sanitize_html=False,
507
+ elem_id="chatbot",
508
+ elem_classes="chatbot"
509
  )
510
 
511
  # Input Area
 
544
  ---
545
  πŸ’‘ **Tips:**
546
  - Use **Advanced Parameters** to fine-tune creativity and response length
547
+ - Enable **Web Search** for real-time, up-to-date information from Google
548
+ - SafeSearch is **OFF** for comprehensive results
549
  - Try different **models** for various tasks (reasoning, coding, general chat)
550
  - Click the **Copy** button on responses to save them to your clipboard
551
  """, elem_classes="footer")