Spaces:

mdnazib963
/

Puppeter

Sleeping

File size: 5,797 Bytes

75ab5c6
 
ee06110
3f285ae
 
ee06110
 
3f285ae
ee06110
75ab5c6
5746e2c
 
75ab5c6
3f285ae
75ab5c6
3f285ae
 
75ab5c6
3f285ae
 
75ab5c6
3f285ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee06110
3f285ae
 
 
 
 
 
 
 
ee06110
3f285ae
ee06110
 
 
 
9f5331c
ee06110
 
 
 
 
5746e2c
8b44d8f
 
3f285ae
9f5331c
ee06110
 
 
 
 
3f285ae
ee06110
 
75ab5c6
9f5331c
8b44d8f
3f285ae
8b44d8f
3f285ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f5331c
 
75ab5c6
8b44d8f
d0a09ef
 
3f285ae
75ab5c6
8b44d8f
3f285ae
75ab5c6
8b44d8f
5746e2c
9f5331c
3f285ae
9f5331c
ee06110
9f5331c
75ab5c6
 
ee06110
5746e2c
ee06110
5746e2c
9f5331c
75ab5c6
5746e2c
9f5331c
75ab5c6
5746e2c
75ab5c6
ee06110
75ab5c6
 
9f5331c
3f285ae
9f5331c
 
5746e2c
75ab5c6
8b44d8f
 
 
 
 
 
 
3f285ae
 
 
8b44d8f
 
9f5331c
8b44d8f
 
 
 
9f5331c
8b44d8f

import gradio as gr
from gradio_client import Client
import nest_asyncio
import asyncio
from playwright.async_api import async_playwright
from crawl4ai import AsyncWebCrawler

# 1. Apply Asyncio Fix
nest_asyncio.apply()

# --- CONFIGURATION ---
AI_CLIENT_URL = "zai-org/GLM-4.5-Space"

async def search_bing_for_links(query):
    """
    Uses Playwright to search Bing and extract the top 3 result URLs.
    This avoids Google's 429 rate limits.
    """
    print(f"🕵️ Searching Bing for: {query}")
    links = []
    
    async with async_playwright() as p:
        # Launch browser with arguments to look like a real user
        browser = await p.chromium.launch(
            headless=True,
            args=[
                "--no-sandbox",
                "--disable-blink-features=AutomationControlled", 
                "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
            ]
        )
        page = await browser.new_page()
        
        try:
            # Go to Bing
            await page.goto(f"https://www.bing.com/search?q={query}", timeout=10000)
            await page.wait_for_selector("li.b_algo h2 a", timeout=5000)
            
            # Extract hrefs from the search results
            # Bing results usually have the class 'b_algo'
            results = await page.locator("li.b_algo h2 a").all()
            
            for res in results[:3]: # Get top 3
                url = await res.get_attribute("href")
                if url and url.startswith("http"):
                    links.append(url)
                    
        except Exception as e:
            print(f"Bing Search Error: {e}")
        finally:
            await browser.close()
            
    return links

async def get_web_research(query):
    """
    1. Get URLs from Bing.
    2. Crawl them with Crawl4AI.
    """
    # Step 1: Get URLs
    urls = await search_bing_for_links(query)
    
    if not urls:
        return "No links found on Bing. The server might be blocked or the query returned no results."

    print(f"🔗 Found URLs: {urls}")
    combined_content = ""

    # Step 2: Crawl URLs
    async with AsyncWebCrawler(verbose=True) as crawler:
        for url in urls:
            try:
                print(f"🕷️ Crawling: {url}")
                result = await crawler.arun(url=url)
                
                if result.markdown:
                    combined_content += f"\n\n=== SOURCE: {url} ===\n"
                    # Limit content to 2500 chars per page to keep AI input manageable
                    combined_content += result.markdown[:2500] 
            except Exception as e:
                print(f"Failed to crawl {url}: {e}")
                continue

    if not combined_content:
        return "Found Bing links, but could not extract text (sites might be protected)."
        
    return combined_content

def clean_api_response(response):
    """
    Parses the complex list/dict response from the GLM-4.5 Space.
    """
    try:
        # Case 1: Response is a list of messages (standard Chat interface)
        if isinstance(response, list):
            # Get the content of the last message
            last_msg = response[-1]
            if isinstance(last_msg, dict) and 'content' in last_msg:
                return last_msg['content']
            return str(last_msg)
            
        # Case 2: Response is a tuple (gradio return)
        if isinstance(response, tuple):
            return str(response[-1])
            
        return str(response)
    except Exception as e:
        return f"Error parsing AI response: {str(e)} | Raw: {str(response)}"

async def ask_ai(user_query):
    if not user_query:
        yield "Please type a topic."
        return

    yield "🔍 Searching Bing & Crawling sites..."
    
    # 1. Perform Research
    context_data = await get_web_research(user_query)
    
    # 2. Construct Prompt
    system_prompt = (
        "You are a helpful research assistant. "
        "I have performed a Bing Search and scraped the top results. "
        "The content is provided below. "
        "Answer the user's question based strictly on this data. "
        "Cite the sources (URLs) used."
    )
    
    full_message = f"User Query: {user_query}\n\nWeb Search Data:\n{context_data}"
    
    yield "🧠 Analyzing content and generating answer..."
    
    # 3. Call AI Client
    try:
        client = Client(AI_CLIENT_URL)
        raw_result = client.predict(
            msg=full_message,
            sys_prompt=system_prompt,
            thinking_enabled=True,
            temperature=0.6,
            api_name="/chat_wrapper"
        )
        
        # 4. Clean response
        final_answer = clean_api_response(raw_result)
        yield final_answer
        
    except Exception as e:
        yield f"AI Connection Error: {str(e)}"

# --- UI SETUP ---
custom_css = """
#component-0 {max-width: 900px; margin: auto;}
"""

with gr.Blocks(css=custom_css, title="AI Bing Agent") as demo:
    gr.Markdown("# 🕷️ AI Research Agent (Bing + Crawl4AI)")
    gr.Markdown("Type a topic. The agent searches Bing, reads the pages, and answers.")
    
    with gr.Row():
        query_input = gr.Textbox(label="Research Topic", placeholder="e.g. Release date of Avengers Doomsday", scale=4)
        submit_btn = gr.Button("Deep Search", variant="primary", scale=1)
    
    output_display = gr.Markdown(label="Research Result")
    
    # Event triggers
    submit_btn.click(fn=ask_ai, inputs=query_input, outputs=output_display)
    query_input.submit(fn=ask_ai, inputs=query_input, outputs=output_display)

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)