CPU-LLM-Inference

Running

App Files Files Community

R-Kentaren commited on about 21 hours ago

Commit

f7b1360

verified ·

1 Parent(s): 25e7c41

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -25

app.py CHANGED Viewed

@@ -10,8 +10,10 @@ import gradio as gr
 import torch
 from transformers import pipeline, TextIteratorStreamer
 from transformers import AutoTokenizer
-from ddgs import DDGS
-from config import MODELS  # Import from config file
 # Global event to signal cancellation from the UI thread to the generation thread
 cancel_event = threading.Event()
@@ -21,6 +23,94 @@ access_token = os.environ.get('HF_TOKEN', '')
 # Global cache for pipelines to avoid re-loading.
 PIPELINES = {}
 def load_pipeline(model_name):
     """
     Load and cache a transformers pipeline for text generation.
@@ -58,18 +148,6 @@ def load_pipeline(model_name):
     PIPELINES[model_name] = pipe
     return pipe
-def retrieve_context(query, max_results=6, max_chars=50):
-    """
-    Retrieve search snippets from DuckDuckGo (runs in background).
-    Returns a list of result strings.
-    """
-    try:
-        with DDGS() as ddgs:
-            return [f"{i+1}. {r.get('title','No Title')} - {r.get('body','')[:max_chars]}"
-                    for i, r in enumerate(islice(ddgs.text(query, region="wt-wt", safesearch="off", timelimit="y"), max_results))]
-    except Exception:
-        return []
 def format_conversation(history, system_prompt, tokenizer):
     if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
         messages = [{"role": "system", "content": system_prompt.strip()}] + history
@@ -123,7 +201,7 @@ def chat_response(user_msg, chat_history, system_prompt,
     debug = ''
     search_results = []
     if enable_search:
-        debug = 'Search task started.'
         thread_search = threading.Thread(
             target=lambda: search_results.extend(
                 retrieve_context(user_msg, int(max_results), int(max_chars))
@@ -138,11 +216,11 @@ def chat_response(user_msg, chat_history, system_prompt,
     if enable_search:
         thread_search.join(timeout=float(search_timeout))
         if search_results:
-            debug = "### Search results merged into prompt\n\n" + "\n".join(
                 f"- {r}" for r in search_results
             )
         else:
-            debug = "*No web search results found.*"
     try:
         cur_date = datetime.now().strftime('%Y-%m-%d')
@@ -151,7 +229,7 @@ def chat_response(user_msg, chat_history, system_prompt,
         if search_results:
             enriched = system_prompt.strip() + f"""
 # SEARCH CONTEXT (TRUSTED SOURCES ONLY)
-Below are web search results. Treat them as the ONLY source of truth for answering.
 {search_results}
 RULES (VERY IMPORTANT):
@@ -289,7 +367,7 @@ def update_duration_estimate(model_name, enable_search, max_results, max_chars,
         model_size = get_model_size(model_name)
         return (f"⏱️ **Estimated GPU Time: {duration:.1f} seconds**\n\n"
                 f"📊 **Model Size:** {model_size:.1f}B parameters\n"
-                f"🔍 **Web Search:** {'Enabled' if enable_search else 'Disabled'}")
     except Exception as e:
         return f"⚠️ Error calculating estimate: {e}"
@@ -310,11 +388,26 @@ with gr.Blocks(
         .chatbot { border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); }
         button.primary { font-weight: 600; }
         .gradio-accordion { margin-bottom: 12px; }
     """
 ) as demo:
     # Header
     gr.Markdown("""
-    # 🧠 CPU LLM Inference
     """)
     with gr.Row():
@@ -330,9 +423,9 @@ with gr.Blocks(
                     info="Select the language model to use"
                 )
                 search_chk = gr.Checkbox(
-                    label="🔍 Enable Web Search",
                     value=False,
-                    info="Augment responses with real-time web data"
                 )
                 sys_prompt = gr.Textbox(label="📝 System Prompt", lines=3, value=update_default_prompt(False), placeholder="Define the assistant's behavior and personality...")
@@ -388,6 +481,10 @@ with gr.Blocks(
                     label="Search Timeout (s)",
                     info="Maximum time to wait for search results"
                 )
             # Actions
             with gr.Row():
@@ -400,8 +497,15 @@ with gr.Blocks(
                 height=600,
                 label="💬 Conversation",
                 show_copy_button=True,
-                avatar_images=(None, "🤖"),
-                bubble_full_width=False
             )
             # Input Area
@@ -440,7 +544,8 @@ with gr.Blocks(
     ---
     💡 **Tips:**
     - Use **Advanced Parameters** to fine-tune creativity and response length
-    - Enable **Web Search** for real-time, up-to-date information
     - Try different **models** for various tasks (reasoning, coding, general chat)
     - Click the **Copy** button on responses to save them to your clipboard
     """, elem_classes="footer")

 import torch
 from transformers import pipeline, TextIteratorStreamer
 from transformers import AutoTokenizer
+from bs4 import BeautifulSoup
+import requests
+from urllib.parse import quote_plus
+from config import MODELS
 # Global event to signal cancellation from the UI thread to the generation thread
 cancel_event = threading.Event()
 # Global cache for pipelines to avoid re-loading.
 PIPELINES = {}
+# Base64 encoded simple avatar images (1x1 pixel transparent PNG)
+# These are minimal placeholders - you can replace with actual base64 images
+USER_AVATAR = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
+BOT_AVATAR = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
+def google_search(query, max_results=6, max_chars=50):
+    """
+    Perform Google search without API (scraping).
+    Safe search is turned off.
+    """
+    try:
+        # Prepare search URL with safe search off
+        search_url = f"https://www.google.com/search?q={quote_plus(query)}&safe=off&num={max_results}"
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        response = requests.get(search_url, headers=headers, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Find search result containers
+        results = []
+        search_results = soup.find_all('div', class_='g')
+        for i, result in enumerate(search_results[:max_results]):
+            try:
+                # Get title
+                title_elem = result.find('h3')
+                title = title_elem.text if title_elem else "No Title"
+                # Get snippet/description
+                snippet_elem = result.find('div', class_='VwiC3b')
+                if not snippet_elem:
+                    snippet_elem = result.find('div', class_='IsZvec')
+                snippet = snippet_elem.text if snippet_elem else ""
+                # Get link
+                link_elem = result.find('a')
+                link = link_elem.get('href') if link_elem else ""
+                if link and link.startswith('/url?q='):
+                    link = link.split('/url?q=')[1].split('&')[0]
+                # Truncate snippet
+                if len(snippet) > max_chars:
+                    snippet = snippet[:max_chars] + "..."
+                results.append({
+                    'title': title,
+                    'snippet': snippet,
+                    'link': link
+                })
+            except Exception as e:
+                continue
+        # Format results
+        formatted_results = []
+        for i, r in enumerate(results):
+            formatted_results.append(f"{i+1}. {r['title']} - {r['snippet']}")
+        return formatted_results
+    except Exception as e:
+        print(f"Google search error: {e}")
+        return []
+def retrieve_context(query, max_results=6, max_chars=50):
+    """
+    Retrieve search snippets from Google (scraping, no API).
+    Safe search is off.
+    Returns a list of result strings.
+    """
+    try:
+        results = google_search(query, max_results, max_chars)
+        if results:
+            return results
+        else:
+            # Fallback to DDG if Google fails
+            from ddgs import DDGS
+            with DDGS() as ddgs:
+                return [f"{i+1}. {r.get('title','No Title')} - {r.get('body','')[:max_chars]}"
+                        for i, r in enumerate(islice(ddgs.text(query, region="wt-wt", safesearch="off", timelimit="y"), max_results))]
+    except Exception as e:
+        print(f"Search error: {e}")
+        return []
 def load_pipeline(model_name):
     """
     Load and cache a transformers pipeline for text generation.
     PIPELINES[model_name] = pipe
     return pipe
 def format_conversation(history, system_prompt, tokenizer):
     if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
         messages = [{"role": "system", "content": system_prompt.strip()}] + history
     debug = ''
     search_results = []
     if enable_search:
+        debug = '🔍 Google search started (safe search: OFF)...'
         thread_search = threading.Thread(
             target=lambda: search_results.extend(
                 retrieve_context(user_msg, int(max_results), int(max_chars))
     if enable_search:
         thread_search.join(timeout=float(search_timeout))
         if search_results:
+            debug = f"✅ Google search completed - Found {len(search_results)} results\n\n" + "\n".join(
                 f"- {r}" for r in search_results
             )
         else:
+            debug = "❌ No web search results found."
     try:
         cur_date = datetime.now().strftime('%Y-%m-%d')
         if search_results:
             enriched = system_prompt.strip() + f"""
 # SEARCH CONTEXT (TRUSTED SOURCES ONLY)
+Below are Google search results. Treat them as the ONLY source of truth for answering.
 {search_results}
 RULES (VERY IMPORTANT):
         model_size = get_model_size(model_name)
         return (f"⏱️ **Estimated GPU Time: {duration:.1f} seconds**\n\n"
                 f"📊 **Model Size:** {model_size:.1f}B parameters\n"
+                f"🔍 **Web Search:** {'Enabled (Google, SafeSearch: OFF)' if enable_search else 'Disabled'}")
     except Exception as e:
         return f"⚠️ Error calculating estimate: {e}"
         .chatbot { border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); }
         button.primary { font-weight: 600; }
         .gradio-accordion { margin-bottom: 12px; }
+        /* Custom avatar styling */
+        .message-wrap { align-items: flex-start !important; }
+        .avatar-image {
+            border-radius: 50% !important;
+            border: 2px solid #667eea !important;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
+        }
+        .bot-avatar {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+            padding: 2px !important;
+        }
+        .user-avatar {
+            background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
+            padding: 2px !important;
+        }
     """
 ) as demo:
     # Header
     gr.Markdown("""
+    # 🧠 LLM Inference with Google Search
     """)
     with gr.Row():
                     info="Select the language model to use"
                 )
                 search_chk = gr.Checkbox(
+                    label="🔍 Enable Web Search (Google, SafeSearch: OFF)",
                     value=False,
+                    info="Augment responses with real-time web data from Google (no API required)"
                 )
                 sys_prompt = gr.Textbox(label="📝 System Prompt", lines=3, value=update_default_prompt(False), placeholder="Define the assistant's behavior and personality...")
                     label="Search Timeout (s)",
                     info="Maximum time to wait for search results"
                 )
+                gr.Markdown("""
+                ⚠️ **Note:** Google search uses web scraping (no API required).
+                SafeSearch is **OFF** for comprehensive results.
+                """)
             # Actions
             with gr.Row():
                 height=600,
                 label="💬 Conversation",
                 show_copy_button=True,
+                avatar_images=(
+                    "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='40' height='40'%3E%3Crect width='40' height='40' rx='20' fill='%23f093fb'/%3E%3Ctext x='20' y='28' text-anchor='middle' font-size='20' fill='white' font-family='Arial'%3E👤%3C/text%3E%3C/svg%3E",  # User avatar
+                    "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='40' height='40'%3E%3Crect width='40' height='40' rx='20' fill='%23667eea'/%3E%3Ctext x='20' y='28' text-anchor='middle' font-size='20' fill='white' font-family='Arial'%3E🤖%3C/text%3E%3C/svg%3E"   # Bot avatar
+                ),
+                bubble_full_width=False,
+                render_markdown=True,
+                sanitize_html=False,
+                elem_id="chatbot",
+                elem_classes="chatbot"
             )
             # Input Area
     ---
     💡 **Tips:**
     - Use **Advanced Parameters** to fine-tune creativity and response length
+    - Enable **Web Search** for real-time, up-to-date information from Google
+    - SafeSearch is **OFF** for comprehensive results
     - Try different **models** for various tasks (reasoning, coding, general chat)
     - Click the **Copy** button on responses to save them to your clipboard
     """, elem_classes="footer")