Puppeter / app.py
mdnazib963's picture
Update app.py
3f285ae verified
import gradio as gr
from gradio_client import Client
import nest_asyncio
import asyncio
from playwright.async_api import async_playwright
from crawl4ai import AsyncWebCrawler
# 1. Apply Asyncio Fix
nest_asyncio.apply()
# --- CONFIGURATION ---
AI_CLIENT_URL = "zai-org/GLM-4.5-Space"
async def search_bing_for_links(query):
"""
Uses Playwright to search Bing and extract the top 3 result URLs.
This avoids Google's 429 rate limits.
"""
print(f"πŸ•΅οΈ Searching Bing for: {query}")
links = []
async with async_playwright() as p:
# Launch browser with arguments to look like a real user
browser = await p.chromium.launch(
headless=True,
args=[
"--no-sandbox",
"--disable-blink-features=AutomationControlled",
"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
]
)
page = await browser.new_page()
try:
# Go to Bing
await page.goto(f"https://www.bing.com/search?q={query}", timeout=10000)
await page.wait_for_selector("li.b_algo h2 a", timeout=5000)
# Extract hrefs from the search results
# Bing results usually have the class 'b_algo'
results = await page.locator("li.b_algo h2 a").all()
for res in results[:3]: # Get top 3
url = await res.get_attribute("href")
if url and url.startswith("http"):
links.append(url)
except Exception as e:
print(f"Bing Search Error: {e}")
finally:
await browser.close()
return links
async def get_web_research(query):
"""
1. Get URLs from Bing.
2. Crawl them with Crawl4AI.
"""
# Step 1: Get URLs
urls = await search_bing_for_links(query)
if not urls:
return "No links found on Bing. The server might be blocked or the query returned no results."
print(f"πŸ”— Found URLs: {urls}")
combined_content = ""
# Step 2: Crawl URLs
async with AsyncWebCrawler(verbose=True) as crawler:
for url in urls:
try:
print(f"πŸ•·οΈ Crawling: {url}")
result = await crawler.arun(url=url)
if result.markdown:
combined_content += f"\n\n=== SOURCE: {url} ===\n"
# Limit content to 2500 chars per page to keep AI input manageable
combined_content += result.markdown[:2500]
except Exception as e:
print(f"Failed to crawl {url}: {e}")
continue
if not combined_content:
return "Found Bing links, but could not extract text (sites might be protected)."
return combined_content
def clean_api_response(response):
"""
Parses the complex list/dict response from the GLM-4.5 Space.
"""
try:
# Case 1: Response is a list of messages (standard Chat interface)
if isinstance(response, list):
# Get the content of the last message
last_msg = response[-1]
if isinstance(last_msg, dict) and 'content' in last_msg:
return last_msg['content']
return str(last_msg)
# Case 2: Response is a tuple (gradio return)
if isinstance(response, tuple):
return str(response[-1])
return str(response)
except Exception as e:
return f"Error parsing AI response: {str(e)} | Raw: {str(response)}"
async def ask_ai(user_query):
if not user_query:
yield "Please type a topic."
return
yield "πŸ” Searching Bing & Crawling sites..."
# 1. Perform Research
context_data = await get_web_research(user_query)
# 2. Construct Prompt
system_prompt = (
"You are a helpful research assistant. "
"I have performed a Bing Search and scraped the top results. "
"The content is provided below. "
"Answer the user's question based strictly on this data. "
"Cite the sources (URLs) used."
)
full_message = f"User Query: {user_query}\n\nWeb Search Data:\n{context_data}"
yield "🧠 Analyzing content and generating answer..."
# 3. Call AI Client
try:
client = Client(AI_CLIENT_URL)
raw_result = client.predict(
msg=full_message,
sys_prompt=system_prompt,
thinking_enabled=True,
temperature=0.6,
api_name="/chat_wrapper"
)
# 4. Clean response
final_answer = clean_api_response(raw_result)
yield final_answer
except Exception as e:
yield f"AI Connection Error: {str(e)}"
# --- UI SETUP ---
custom_css = """
#component-0 {max-width: 900px; margin: auto;}
"""
with gr.Blocks(css=custom_css, title="AI Bing Agent") as demo:
gr.Markdown("# πŸ•·οΈ AI Research Agent (Bing + Crawl4AI)")
gr.Markdown("Type a topic. The agent searches Bing, reads the pages, and answers.")
with gr.Row():
query_input = gr.Textbox(label="Research Topic", placeholder="e.g. Release date of Avengers Doomsday", scale=4)
submit_btn = gr.Button("Deep Search", variant="primary", scale=1)
output_display = gr.Markdown(label="Research Result")
# Event triggers
submit_btn.click(fn=ask_ai, inputs=query_input, outputs=output_display)
query_input.submit(fn=ask_ai, inputs=query_input, outputs=output_display)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)