import gradio as gr from gradio_client import Client import nest_asyncio import asyncio from playwright.async_api import async_playwright from crawl4ai import AsyncWebCrawler # 1. Apply Asyncio Fix nest_asyncio.apply() # --- CONFIGURATION --- AI_CLIENT_URL = "zai-org/GLM-4.5-Space" async def search_bing_for_links(query): """ Uses Playwright to search Bing and extract the top 3 result URLs. This avoids Google's 429 rate limits. """ print(f"🕵️ Searching Bing for: {query}") links = [] async with async_playwright() as p: # Launch browser with arguments to look like a real user browser = await p.chromium.launch( headless=True, args=[ "--no-sandbox", "--disable-blink-features=AutomationControlled", "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ] ) page = await browser.new_page() try: # Go to Bing await page.goto(f"https://www.bing.com/search?q={query}", timeout=10000) await page.wait_for_selector("li.b_algo h2 a", timeout=5000) # Extract hrefs from the search results # Bing results usually have the class 'b_algo' results = await page.locator("li.b_algo h2 a").all() for res in results[:3]: # Get top 3 url = await res.get_attribute("href") if url and url.startswith("http"): links.append(url) except Exception as e: print(f"Bing Search Error: {e}") finally: await browser.close() return links async def get_web_research(query): """ 1. Get URLs from Bing. 2. Crawl them with Crawl4AI. """ # Step 1: Get URLs urls = await search_bing_for_links(query) if not urls: return "No links found on Bing. The server might be blocked or the query returned no results." print(f"🔗 Found URLs: {urls}") combined_content = "" # Step 2: Crawl URLs async with AsyncWebCrawler(verbose=True) as crawler: for url in urls: try: print(f"🕷️ Crawling: {url}") result = await crawler.arun(url=url) if result.markdown: combined_content += f"\n\n=== SOURCE: {url} ===\n" # Limit content to 2500 chars per page to keep AI input manageable combined_content += result.markdown[:2500] except Exception as e: print(f"Failed to crawl {url}: {e}") continue if not combined_content: return "Found Bing links, but could not extract text (sites might be protected)." return combined_content def clean_api_response(response): """ Parses the complex list/dict response from the GLM-4.5 Space. """ try: # Case 1: Response is a list of messages (standard Chat interface) if isinstance(response, list): # Get the content of the last message last_msg = response[-1] if isinstance(last_msg, dict) and 'content' in last_msg: return last_msg['content'] return str(last_msg) # Case 2: Response is a tuple (gradio return) if isinstance(response, tuple): return str(response[-1]) return str(response) except Exception as e: return f"Error parsing AI response: {str(e)} | Raw: {str(response)}" async def ask_ai(user_query): if not user_query: yield "Please type a topic." return yield "🔍 Searching Bing & Crawling sites..." # 1. Perform Research context_data = await get_web_research(user_query) # 2. Construct Prompt system_prompt = ( "You are a helpful research assistant. " "I have performed a Bing Search and scraped the top results. " "The content is provided below. " "Answer the user's question based strictly on this data. " "Cite the sources (URLs) used." ) full_message = f"User Query: {user_query}\n\nWeb Search Data:\n{context_data}" yield "🧠 Analyzing content and generating answer..." # 3. Call AI Client try: client = Client(AI_CLIENT_URL) raw_result = client.predict( msg=full_message, sys_prompt=system_prompt, thinking_enabled=True, temperature=0.6, api_name="/chat_wrapper" ) # 4. Clean response final_answer = clean_api_response(raw_result) yield final_answer except Exception as e: yield f"AI Connection Error: {str(e)}" # --- UI SETUP --- custom_css = """ #component-0 {max-width: 900px; margin: auto;} """ with gr.Blocks(css=custom_css, title="AI Bing Agent") as demo: gr.Markdown("# 🕷️ AI Research Agent (Bing + Crawl4AI)") gr.Markdown("Type a topic. The agent searches Bing, reads the pages, and answers.") with gr.Row(): query_input = gr.Textbox(label="Research Topic", placeholder="e.g. Release date of Avengers Doomsday", scale=4) submit_btn = gr.Button("Deep Search", variant="primary", scale=1) output_display = gr.Markdown(label="Research Result") # Event triggers submit_btn.click(fn=ask_ai, inputs=query_input, outputs=output_display) query_input.submit(fn=ask_ai, inputs=query_input, outputs=output_display) if __name__ == "__main__": demo.queue().launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)