CPU-LLM-Inference

Running

App Files Files Community

R-Kentaren commited on about 14 hours ago

Commit

13597bf

verified ·

1 Parent(s): f7b1360

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -91

app.py CHANGED Viewed

@@ -13,6 +13,8 @@ from transformers import AutoTokenizer
 from bs4 import BeautifulSoup
 import requests
 from urllib.parse import quote_plus
 from config import MODELS
 # Global event to signal cancellation from the UI thread to the generation thread
@@ -23,93 +25,189 @@ access_token = os.environ.get('HF_TOKEN', '')
 # Global cache for pipelines to avoid re-loading.
 PIPELINES = {}
-# Base64 encoded simple avatar images (1x1 pixel transparent PNG)
-# These are minimal placeholders - you can replace with actual base64 images
-USER_AVATAR = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
-BOT_AVATAR = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
-def google_search(query, max_results=6, max_chars=50):
-    """
-    Perform Google search without API (scraping).
-    Safe search is turned off.
-    """
     try:
-        # Prepare search URL with safe search off
-        search_url = f"https://www.google.com/search?q={quote_plus(query)}&safe=off&num={max_results}"
         headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
         response = requests.get(search_url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Find search result containers
         results = []
-        search_results = soup.find_all('div', class_='g')
-        for i, result in enumerate(search_results[:max_results]):
             try:
-                # Get title
-                title_elem = result.find('h3')
                 title = title_elem.text if title_elem else "No Title"
-                # Get snippet/description
-                snippet_elem = result.find('div', class_='VwiC3b')
-                if not snippet_elem:
-                    snippet_elem = result.find('div', class_='IsZvec')
                 snippet = snippet_elem.text if snippet_elem else ""
-                # Get link
-                link_elem = result.find('a')
-                link = link_elem.get('href') if link_elem else ""
-                if link and link.startswith('/url?q='):
-                    link = link.split('/url?q=')[1].split('&')[0]
-                # Truncate snippet
                 if len(snippet) > max_chars:
                     snippet = snippet[:max_chars] + "..."
-                results.append({
-                    'title': title,
-                    'snippet': snippet,
-                    'link': link
-                })
-            except Exception as e:
                 continue
-        # Format results
-        formatted_results = []
-        for i, r in enumerate(results):
-            formatted_results.append(f"{i+1}. {r['title']} - {r['snippet']}")
-        return formatted_results
-    except Exception as e:
-        print(f"Google search error: {e}")
         return []
 def retrieve_context(query, max_results=6, max_chars=50):
     """
-    Retrieve search snippets from Google (scraping, no API).
-    Safe search is off.
     Returns a list of result strings.
     """
-    try:
-        results = google_search(query, max_results, max_chars)
-        if results:
-            return results
-        else:
-            # Fallback to DDG if Google fails
-            from ddgs import DDGS
-            with DDGS() as ddgs:
-                return [f"{i+1}. {r.get('title','No Title')} - {r.get('body','')[:max_chars]}"
-                        for i, r in enumerate(islice(ddgs.text(query, region="wt-wt", safesearch="off", timelimit="y"), max_results))]
-    except Exception as e:
-        print(f"Search error: {e}")
-        return []
 def load_pipeline(model_name):
     """
@@ -201,7 +299,7 @@ def chat_response(user_msg, chat_history, system_prompt,
     debug = ''
     search_results = []
     if enable_search:
-        debug = '🔍 Google search started (safe search: OFF)...'
         thread_search = threading.Thread(
             target=lambda: search_results.extend(
                 retrieve_context(user_msg, int(max_results), int(max_chars))
@@ -216,11 +314,11 @@ def chat_response(user_msg, chat_history, system_prompt,
     if enable_search:
         thread_search.join(timeout=float(search_timeout))
         if search_results:
-            debug = f"✅ Google search completed - Found {len(search_results)} results\n\n" + "\n".join(
                 f"- {r}" for r in search_results
             )
         else:
-            debug = "❌ No web search results found."
     try:
         cur_date = datetime.now().strftime('%Y-%m-%d')
@@ -229,7 +327,7 @@ def chat_response(user_msg, chat_history, system_prompt,
         if search_results:
             enriched = system_prompt.strip() + f"""
 # SEARCH CONTEXT (TRUSTED SOURCES ONLY)
-Below are Google search results. Treat them as the ONLY source of truth for answering.
 {search_results}
 RULES (VERY IMPORTANT):
@@ -367,7 +465,7 @@ def update_duration_estimate(model_name, enable_search, max_results, max_chars,
         model_size = get_model_size(model_name)
         return (f"⏱️ **Estimated GPU Time: {duration:.1f} seconds**\n\n"
                 f"📊 **Model Size:** {model_size:.1f}B parameters\n"
-                f"🔍 **Web Search:** {'Enabled (Google, SafeSearch: OFF)' if enable_search else 'Disabled'}")
     except Exception as e:
         return f"⚠️ Error calculating estimate: {e}"
@@ -388,26 +486,11 @@ with gr.Blocks(
         .chatbot { border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); }
         button.primary { font-weight: 600; }
         .gradio-accordion { margin-bottom: 12px; }
-        /* Custom avatar styling */
-        .message-wrap { align-items: flex-start !important; }
-        .avatar-image {
-            border-radius: 50% !important;
-            border: 2px solid #667eea !important;
-            box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
-        }
-        .bot-avatar {
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-            padding: 2px !important;
-        }
-        .user-avatar {
-            background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
-            padding: 2px !important;
-        }
     """
 ) as demo:
     # Header
     gr.Markdown("""
-    # 🧠 LLM Inference with Google Search
     """)
     with gr.Row():
@@ -423,9 +506,9 @@ with gr.Blocks(
                     info="Select the language model to use"
                 )
                 search_chk = gr.Checkbox(
-                    label="🔍 Enable Web Search (Google, SafeSearch: OFF)",
                     value=False,
-                    info="Augment responses with real-time web data from Google (no API required)"
                 )
                 sys_prompt = gr.Textbox(label="📝 System Prompt", lines=3, value=update_default_prompt(False), placeholder="Define the assistant's behavior and personality...")
@@ -482,7 +565,11 @@ with gr.Blocks(
                     info="Maximum time to wait for search results"
                 )
                 gr.Markdown("""
-                ⚠️ **Note:** Google search uses web scraping (no API required).
                 SafeSearch is **OFF** for comprehensive results.
                 """)
@@ -498,14 +585,12 @@ with gr.Blocks(
                 label="💬 Conversation",
                 show_copy_button=True,
                 avatar_images=(
-                    "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='40' height='40'%3E%3Crect width='40' height='40' rx='20' fill='%23f093fb'/%3E%3Ctext x='20' y='28' text-anchor='middle' font-size='20' fill='white' font-family='Arial'%3E👤%3C/text%3E%3C/svg%3E",  # User avatar
-                    "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='40' height='40'%3E%3Crect width='40' height='40' rx='20' fill='%23667eea'/%3E%3Ctext x='20' y='28' text-anchor='middle' font-size='20' fill='white' font-family='Arial'%3E🤖%3C/text%3E%3C/svg%3E"   # Bot avatar
                 ),
                 bubble_full_width=False,
                 render_markdown=True,
-                sanitize_html=False,
-                elem_id="chatbot",
-                elem_classes="chatbot"
             )
             # Input Area
@@ -544,7 +629,7 @@ with gr.Blocks(
     ---
     💡 **Tips:**
     - Use **Advanced Parameters** to fine-tune creativity and response length
-    - Enable **Web Search** for real-time, up-to-date information from Google
     - SafeSearch is **OFF** for comprehensive results
     - Try different **models** for various tasks (reasoning, coding, general chat)
     - Click the **Copy** button on responses to save them to your clipboard
@@ -560,7 +645,6 @@ with gr.Blocks(
     def submit_and_manage_ui(user_msg, chat_history, *args):
         """
         Orchestrator function that manages UI state and calls the backend chat function.
-        It uses a try...finally block to ensure the UI is always reset.
         """
         if not user_msg.strip():
             yield {}

 from bs4 import BeautifulSoup
 import requests
 from urllib.parse import quote_plus
+import json
+import urllib.parse
 from config import MODELS
 # Global event to signal cancellation from the UI thread to the generation thread
 # Global cache for pipelines to avoid re-loading.
 PIPELINES = {}
+def google_search_web(query, max_results=6, max_chars=50):
+    """Search using Google web scraping with multiple approaches"""
+    # Try multiple User-Agents
+    user_agents = [
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+    ]
+    for user_agent in user_agents:
+        try:
+            # Try different search URLs
+            search_urls = [
+                f"https://www.google.com/search?q={quote_plus(query)}&safe=off&num={max_results}",
+                f"https://www.google.com/search?q={quote_plus(query)}&safe=off&num={max_results}&hl=en",
+                f"https://www.google.com/webhp?safe=off&q={quote_plus(query)}&num={max_results}"
+            ]
+            for search_url in search_urls:
+                try:
+                    headers = {
+                        'User-Agent': user_agent,
+                        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+                        'Accept-Language': 'en-US,en;q=0.5',
+                        'Accept-Encoding': 'gzip, deflate',
+                        'Connection': 'keep-alive',
+                        'Upgrade-Insecure-Requests': '1',
+                        'Cache-Control': 'max-age=0'
+                    }
+                    response = requests.get(search_url, headers=headers, timeout=15, verify=True)
+                    response.raise_for_status()
+                    soup = BeautifulSoup(response.text, 'html.parser')
+                    # Find search result containers
+                    results = []
+                    # Try different selectors
+                    selectors = [
+                        ('div', 'g'),
+                        ('div', 'tF2Cxc'),
+                        ('div', 'MjjYud'),
+                        ('div', 'yuRUbf')
+                    ]
+                    search_results = []
+                    for tag, class_name in selectors:
+                        search_results = soup.find_all(tag, class_=class_name)
+                        if search_results:
+                            break
+                    if not search_results:
+                        # Try alternative parsing
+                        search_results = soup.find_all('div', class_=re.compile(r'^(g|tF2Cxc|MjjYud|yuRUbf)'))
+                    for result in search_results[:max_results]:
+                        try:
+                            # Get title
+                            title_elem = result.find('h3')
+                            if not title_elem:
+                                title_elem = result.find('h2')
+                            title = title_elem.text if title_elem else "No Title"
+                            # Get snippet
+                            snippet_elem = result.find('div', class_='VwiC3b')
+                            if not snippet_elem:
+                                snippet_elem = result.find('div', class_='IsZvec')
+                            if not snippet_elem:
+                                snippet_elem = result.find('div', class_='lEBKkf')
+                            snippet = snippet_elem.text if snippet_elem else ""
+                            # Get link
+                            link_elem = result.find('a')
+                            link = link_elem.get('href') if link_elem else ""
+                            if link and link.startswith('/url?q='):
+                                link = urllib.parse.unquote(link.split('/url?q=')[1].split('&')[0])
+                            if link and not link.startswith('http'):
+                                continue
+                            # Clean up snippet
+                            snippet = ' '.join(snippet.split())
+                            if len(snippet) > max_chars:
+                                snippet = snippet[:max_chars] + "..."
+                            if title and snippet:
+                                results.append(f"{len(results)+1}. {title} - {snippet}")
+                        except Exception:
+                            continue
+                    if results:
+                        return results
+                except Exception:
+                    continue
+        except Exception:
+            continue
+    return []
+def duckduckgo_search(query, max_results=6, max_chars=50):
+    """Fallback to DuckDuckGo search"""
+    try:
+        from ddgs import DDGS
+        with DDGS() as ddgs:
+            results = []
+            for r in islice(ddgs.text(query, region="wt-wt", safesearch="off", timelimit="y"), max_results):
+                title = r.get('title', 'No Title')
+                body = r.get('body', '')
+                if len(body) > max_chars:
+                    body = body[:max_chars] + "..."
+                results.append(f"{len(results)+1}. {title} - {body}")
+            return results
+    except Exception:
+        return []
+def bing_search(query, max_results=6, max_chars=50):
+    """Fallback to Bing search"""
     try:
         headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
         }
+        search_url = f"https://www.bing.com/search?q={quote_plus(query)}&safeSearch=off&count={max_results}"
         response = requests.get(search_url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
         results = []
+        # Find search results
+        search_results = soup.find_all('li', class_='b_algo')
+        for result in search_results[:max_results]:
             try:
+                title_elem = result.find('h2')
                 title = title_elem.text if title_elem else "No Title"
+                snippet_elem = result.find('p')
                 snippet = snippet_elem.text if snippet_elem else ""
                 if len(snippet) > max_chars:
                     snippet = snippet[:max_chars] + "..."
+                if title and snippet:
+                    results.append(f"{len(results)+1}. {title} - {snippet}")
+            except Exception:
                 continue
+        return results
+    except Exception:
         return []
 def retrieve_context(query, max_results=6, max_chars=50):
     """
+    Retrieve search snippets from multiple search engines.
     Returns a list of result strings.
     """
+    # Try Google first
+    results = google_search_web(query, max_results, max_chars)
+    if results:
+        print(f"✅ Google search successful: {len(results)} results")
+        return results
+    # Try DuckDuckGo
+    results = duckduckgo_search(query, max_results, max_chars)
+    if results:
+        print(f"✅ DuckDuckGo search successful: {len(results)} results")
+        return results
+    # Try Bing
+    results = bing_search(query, max_results, max_chars)
+    if results:
+        print(f"✅ Bing search successful: {len(results)} results")
+        return results
+    print("❌ All search engines failed")
+    return []
 def load_pipeline(model_name):
     """
     debug = ''
     search_results = []
     if enable_search:
+        debug = '🔍 Searching (Google → DuckDuckGo → Bing)...'
         thread_search = threading.Thread(
             target=lambda: search_results.extend(
                 retrieve_context(user_msg, int(max_results), int(max_chars))
     if enable_search:
         thread_search.join(timeout=float(search_timeout))
         if search_results:
+            debug = f"✅ Search completed - Found {len(search_results)} results\n\n" + "\n".join(
                 f"- {r}" for r in search_results
             )
         else:
+            debug = "❌ No search results found. Check internet connection or try again."
     try:
         cur_date = datetime.now().strftime('%Y-%m-%d')
         if search_results:
             enriched = system_prompt.strip() + f"""
 # SEARCH CONTEXT (TRUSTED SOURCES ONLY)
+Below are search results. Treat them as the ONLY source of truth for answering.
 {search_results}
 RULES (VERY IMPORTANT):
         model_size = get_model_size(model_name)
         return (f"⏱️ **Estimated GPU Time: {duration:.1f} seconds**\n\n"
                 f"📊 **Model Size:** {model_size:.1f}B parameters\n"
+                f"🔍 **Web Search:** {'Enabled (Multi-Engine)' if enable_search else 'Disabled'}")
     except Exception as e:
         return f"⚠️ Error calculating estimate: {e}"
         .chatbot { border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); }
         button.primary { font-weight: 600; }
         .gradio-accordion { margin-bottom: 12px; }
     """
 ) as demo:
     # Header
     gr.Markdown("""
+    # 🧠 LLM Inference with Multi-Engine Search
     """)
     with gr.Row():
                     info="Select the language model to use"
                 )
                 search_chk = gr.Checkbox(
+                    label="🔍 Enable Web Search",
                     value=False,
+                    info="Search across Google, DuckDuckGo, and Bing (no API required)"
                 )
                 sys_prompt = gr.Textbox(label="📝 System Prompt", lines=3, value=update_default_prompt(False), placeholder="Define the assistant's behavior and personality...")
                     info="Maximum time to wait for search results"
                 )
                 gr.Markdown("""
+                ⚠️ **Search Engines:**
+                - Google (primary)
+                - DuckDuckGo (fallback)
+                - Bing (fallback)
                 SafeSearch is **OFF** for comprehensive results.
                 """)
                 label="💬 Conversation",
                 show_copy_button=True,
                 avatar_images=(
+                    "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='40' height='40'%3E%3Crect width='40' height='40' rx='20' fill='%23f093fb'/%3E%3Ctext x='20' y='28' text-anchor='middle' font-size='20' fill='white' font-family='Arial'%3E👤%3C/text%3E%3C/svg%3E",
+                    "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='40' height='40'%3E%3Crect width='40' height='40' rx='20' fill='%23667eea'/%3E%3Ctext x='20' y='28' text-anchor='middle' font-size='20' fill='white' font-family='Arial'%3E🤖%3C/text%3E%3C/svg%3E"
                 ),
                 bubble_full_width=False,
                 render_markdown=True,
+                sanitize_html=False
             )
             # Input Area
     ---
     💡 **Tips:**
     - Use **Advanced Parameters** to fine-tune creativity and response length
+    - Enable **Web Search** for real-time information (uses multiple search engines)
     - SafeSearch is **OFF** for comprehensive results
     - Try different **models** for various tasks (reasoning, coding, general chat)
     - Click the **Copy** button on responses to save them to your clipboard
     def submit_and_manage_ui(user_msg, chat_history, *args):
         """
         Orchestrator function that manages UI state and calls the backend chat function.
         """
         if not user_msg.strip():
             yield {}