Spaces:
Sleeping
Sleeping
File size: 5,797 Bytes
75ab5c6 ee06110 3f285ae ee06110 3f285ae ee06110 75ab5c6 5746e2c 75ab5c6 3f285ae 75ab5c6 3f285ae 75ab5c6 3f285ae 75ab5c6 3f285ae ee06110 3f285ae ee06110 3f285ae ee06110 9f5331c ee06110 5746e2c 8b44d8f 3f285ae 9f5331c ee06110 3f285ae ee06110 75ab5c6 9f5331c 8b44d8f 3f285ae 8b44d8f 3f285ae 9f5331c 75ab5c6 8b44d8f d0a09ef 3f285ae 75ab5c6 8b44d8f 3f285ae 75ab5c6 8b44d8f 5746e2c 9f5331c 3f285ae 9f5331c ee06110 9f5331c 75ab5c6 ee06110 5746e2c ee06110 5746e2c 9f5331c 75ab5c6 5746e2c 9f5331c 75ab5c6 5746e2c 75ab5c6 ee06110 75ab5c6 9f5331c 3f285ae 9f5331c 5746e2c 75ab5c6 8b44d8f 3f285ae 8b44d8f 9f5331c 8b44d8f 9f5331c 8b44d8f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | import gradio as gr
from gradio_client import Client
import nest_asyncio
import asyncio
from playwright.async_api import async_playwright
from crawl4ai import AsyncWebCrawler
# 1. Apply Asyncio Fix
nest_asyncio.apply()
# --- CONFIGURATION ---
AI_CLIENT_URL = "zai-org/GLM-4.5-Space"
async def search_bing_for_links(query):
"""
Uses Playwright to search Bing and extract the top 3 result URLs.
This avoids Google's 429 rate limits.
"""
print(f"🕵️ Searching Bing for: {query}")
links = []
async with async_playwright() as p:
# Launch browser with arguments to look like a real user
browser = await p.chromium.launch(
headless=True,
args=[
"--no-sandbox",
"--disable-blink-features=AutomationControlled",
"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
]
)
page = await browser.new_page()
try:
# Go to Bing
await page.goto(f"https://www.bing.com/search?q={query}", timeout=10000)
await page.wait_for_selector("li.b_algo h2 a", timeout=5000)
# Extract hrefs from the search results
# Bing results usually have the class 'b_algo'
results = await page.locator("li.b_algo h2 a").all()
for res in results[:3]: # Get top 3
url = await res.get_attribute("href")
if url and url.startswith("http"):
links.append(url)
except Exception as e:
print(f"Bing Search Error: {e}")
finally:
await browser.close()
return links
async def get_web_research(query):
"""
1. Get URLs from Bing.
2. Crawl them with Crawl4AI.
"""
# Step 1: Get URLs
urls = await search_bing_for_links(query)
if not urls:
return "No links found on Bing. The server might be blocked or the query returned no results."
print(f"🔗 Found URLs: {urls}")
combined_content = ""
# Step 2: Crawl URLs
async with AsyncWebCrawler(verbose=True) as crawler:
for url in urls:
try:
print(f"🕷️ Crawling: {url}")
result = await crawler.arun(url=url)
if result.markdown:
combined_content += f"\n\n=== SOURCE: {url} ===\n"
# Limit content to 2500 chars per page to keep AI input manageable
combined_content += result.markdown[:2500]
except Exception as e:
print(f"Failed to crawl {url}: {e}")
continue
if not combined_content:
return "Found Bing links, but could not extract text (sites might be protected)."
return combined_content
def clean_api_response(response):
"""
Parses the complex list/dict response from the GLM-4.5 Space.
"""
try:
# Case 1: Response is a list of messages (standard Chat interface)
if isinstance(response, list):
# Get the content of the last message
last_msg = response[-1]
if isinstance(last_msg, dict) and 'content' in last_msg:
return last_msg['content']
return str(last_msg)
# Case 2: Response is a tuple (gradio return)
if isinstance(response, tuple):
return str(response[-1])
return str(response)
except Exception as e:
return f"Error parsing AI response: {str(e)} | Raw: {str(response)}"
async def ask_ai(user_query):
if not user_query:
yield "Please type a topic."
return
yield "🔍 Searching Bing & Crawling sites..."
# 1. Perform Research
context_data = await get_web_research(user_query)
# 2. Construct Prompt
system_prompt = (
"You are a helpful research assistant. "
"I have performed a Bing Search and scraped the top results. "
"The content is provided below. "
"Answer the user's question based strictly on this data. "
"Cite the sources (URLs) used."
)
full_message = f"User Query: {user_query}\n\nWeb Search Data:\n{context_data}"
yield "🧠 Analyzing content and generating answer..."
# 3. Call AI Client
try:
client = Client(AI_CLIENT_URL)
raw_result = client.predict(
msg=full_message,
sys_prompt=system_prompt,
thinking_enabled=True,
temperature=0.6,
api_name="/chat_wrapper"
)
# 4. Clean response
final_answer = clean_api_response(raw_result)
yield final_answer
except Exception as e:
yield f"AI Connection Error: {str(e)}"
# --- UI SETUP ---
custom_css = """
#component-0 {max-width: 900px; margin: auto;}
"""
with gr.Blocks(css=custom_css, title="AI Bing Agent") as demo:
gr.Markdown("# 🕷️ AI Research Agent (Bing + Crawl4AI)")
gr.Markdown("Type a topic. The agent searches Bing, reads the pages, and answers.")
with gr.Row():
query_input = gr.Textbox(label="Research Topic", placeholder="e.g. Release date of Avengers Doomsday", scale=4)
submit_btn = gr.Button("Deep Search", variant="primary", scale=1)
output_display = gr.Markdown(label="Research Result")
# Event triggers
submit_btn.click(fn=ask_ai, inputs=query_input, outputs=output_display)
query_input.submit(fn=ask_ai, inputs=query_input, outputs=output_display)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False) |