Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gradio_client import Client | |
| import nest_asyncio | |
| import asyncio | |
| from playwright.async_api import async_playwright | |
| from crawl4ai import AsyncWebCrawler | |
| # 1. Apply Asyncio Fix | |
| nest_asyncio.apply() | |
| # --- CONFIGURATION --- | |
| AI_CLIENT_URL = "zai-org/GLM-4.5-Space" | |
| async def search_bing_for_links(query): | |
| """ | |
| Uses Playwright to search Bing and extract the top 3 result URLs. | |
| This avoids Google's 429 rate limits. | |
| """ | |
| print(f"π΅οΈ Searching Bing for: {query}") | |
| links = [] | |
| async with async_playwright() as p: | |
| # Launch browser with arguments to look like a real user | |
| browser = await p.chromium.launch( | |
| headless=True, | |
| args=[ | |
| "--no-sandbox", | |
| "--disable-blink-features=AutomationControlled", | |
| "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" | |
| ] | |
| ) | |
| page = await browser.new_page() | |
| try: | |
| # Go to Bing | |
| await page.goto(f"https://www.bing.com/search?q={query}", timeout=10000) | |
| await page.wait_for_selector("li.b_algo h2 a", timeout=5000) | |
| # Extract hrefs from the search results | |
| # Bing results usually have the class 'b_algo' | |
| results = await page.locator("li.b_algo h2 a").all() | |
| for res in results[:3]: # Get top 3 | |
| url = await res.get_attribute("href") | |
| if url and url.startswith("http"): | |
| links.append(url) | |
| except Exception as e: | |
| print(f"Bing Search Error: {e}") | |
| finally: | |
| await browser.close() | |
| return links | |
| async def get_web_research(query): | |
| """ | |
| 1. Get URLs from Bing. | |
| 2. Crawl them with Crawl4AI. | |
| """ | |
| # Step 1: Get URLs | |
| urls = await search_bing_for_links(query) | |
| if not urls: | |
| return "No links found on Bing. The server might be blocked or the query returned no results." | |
| print(f"π Found URLs: {urls}") | |
| combined_content = "" | |
| # Step 2: Crawl URLs | |
| async with AsyncWebCrawler(verbose=True) as crawler: | |
| for url in urls: | |
| try: | |
| print(f"π·οΈ Crawling: {url}") | |
| result = await crawler.arun(url=url) | |
| if result.markdown: | |
| combined_content += f"\n\n=== SOURCE: {url} ===\n" | |
| # Limit content to 2500 chars per page to keep AI input manageable | |
| combined_content += result.markdown[:2500] | |
| except Exception as e: | |
| print(f"Failed to crawl {url}: {e}") | |
| continue | |
| if not combined_content: | |
| return "Found Bing links, but could not extract text (sites might be protected)." | |
| return combined_content | |
| def clean_api_response(response): | |
| """ | |
| Parses the complex list/dict response from the GLM-4.5 Space. | |
| """ | |
| try: | |
| # Case 1: Response is a list of messages (standard Chat interface) | |
| if isinstance(response, list): | |
| # Get the content of the last message | |
| last_msg = response[-1] | |
| if isinstance(last_msg, dict) and 'content' in last_msg: | |
| return last_msg['content'] | |
| return str(last_msg) | |
| # Case 2: Response is a tuple (gradio return) | |
| if isinstance(response, tuple): | |
| return str(response[-1]) | |
| return str(response) | |
| except Exception as e: | |
| return f"Error parsing AI response: {str(e)} | Raw: {str(response)}" | |
| async def ask_ai(user_query): | |
| if not user_query: | |
| yield "Please type a topic." | |
| return | |
| yield "π Searching Bing & Crawling sites..." | |
| # 1. Perform Research | |
| context_data = await get_web_research(user_query) | |
| # 2. Construct Prompt | |
| system_prompt = ( | |
| "You are a helpful research assistant. " | |
| "I have performed a Bing Search and scraped the top results. " | |
| "The content is provided below. " | |
| "Answer the user's question based strictly on this data. " | |
| "Cite the sources (URLs) used." | |
| ) | |
| full_message = f"User Query: {user_query}\n\nWeb Search Data:\n{context_data}" | |
| yield "π§ Analyzing content and generating answer..." | |
| # 3. Call AI Client | |
| try: | |
| client = Client(AI_CLIENT_URL) | |
| raw_result = client.predict( | |
| msg=full_message, | |
| sys_prompt=system_prompt, | |
| thinking_enabled=True, | |
| temperature=0.6, | |
| api_name="/chat_wrapper" | |
| ) | |
| # 4. Clean response | |
| final_answer = clean_api_response(raw_result) | |
| yield final_answer | |
| except Exception as e: | |
| yield f"AI Connection Error: {str(e)}" | |
| # --- UI SETUP --- | |
| custom_css = """ | |
| #component-0 {max-width: 900px; margin: auto;} | |
| """ | |
| with gr.Blocks(css=custom_css, title="AI Bing Agent") as demo: | |
| gr.Markdown("# π·οΈ AI Research Agent (Bing + Crawl4AI)") | |
| gr.Markdown("Type a topic. The agent searches Bing, reads the pages, and answers.") | |
| with gr.Row(): | |
| query_input = gr.Textbox(label="Research Topic", placeholder="e.g. Release date of Avengers Doomsday", scale=4) | |
| submit_btn = gr.Button("Deep Search", variant="primary", scale=1) | |
| output_display = gr.Markdown(label="Research Result") | |
| # Event triggers | |
| submit_btn.click(fn=ask_ai, inputs=query_input, outputs=output_display) | |
| query_input.submit(fn=ask_ai, inputs=query_input, outputs=output_display) | |
| if __name__ == "__main__": | |
| demo.queue().launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False) |