| | import os |
| | import subprocess |
| | import asyncio |
| | import json |
| | import gradio as gr |
| | import nest_asyncio |
| |
|
| | |
| | def install_playwright(): |
| | try: |
| | |
| | os.environ['PLAYWRIGHT_BROWSERS_PATH'] = '/home/user/pw-browsers' |
| | |
| | |
| | if not os.path.exists('/home/user/pw-browsers'): |
| | print("⏳ Installing Chromium browser... Please wait...") |
| | subprocess.run(["python3", "-m", "playwright", "install", "chromium"], check=True) |
| | |
| | subprocess.run(["python3", "-m", "playwright", "install-deps", "chromium"], check=True) |
| | print("✅ Browser installed successfully!") |
| | else: |
| | print("✅ Browser already exists.") |
| | except Exception as e: |
| | print(f"⚠️ Installation warning: {e}") |
| |
|
| | |
| | install_playwright() |
| |
|
| | |
| | from crawl4ai import ( |
| | AsyncWebCrawler, |
| | BrowserConfig, |
| | CrawlerRunConfig, |
| | CacheMode, |
| | LLMConfig, |
| | LLMExtractionStrategy |
| | ) |
| |
|
| | nest_asyncio.apply() |
| |
|
| | async def extract_with_gemini(url, api_key, prompt): |
| | if not url or not api_key: |
| | return "⚠️ অনুগ্রহ করে URL এবং Gemini API Key দিন।" |
| |
|
| | |
| | browser_config = BrowserConfig( |
| | headless=True, |
| | extra_args=[ |
| | "--disable-gpu", |
| | "--disable-dev-shm-usage", |
| | "--no-sandbox", |
| | "--disable-setuid-sandbox" |
| | ] |
| | ) |
| |
|
| | llm_config = LLMConfig( |
| | provider="gemini/gemini-2.5-flash", |
| | api_token=api_key |
| | ) |
| |
|
| | extraction_strategy = LLMExtractionStrategy( |
| | llm_config=llm_config, |
| | instruction=prompt, |
| | verbose=True |
| | ) |
| | |
| | run_config = CrawlerRunConfig( |
| | extraction_strategy=extraction_strategy, |
| | cache_mode=CacheMode.BYPASS |
| | ) |
| |
|
| | try: |
| | async with AsyncWebCrawler(config=browser_config) as crawler: |
| | result = await crawler.arun(url=url, config=run_config) |
| | if result.success: |
| | try: |
| | return json.dumps(json.loads(result.extracted_content), indent=2) |
| | except: |
| | return result.extracted_content |
| | else: |
| | return f"❌ এরর: {result.error_message}" |
| | except Exception as e: |
| | return f"❌ রানটাইম এরর: {str(e)}" |
| |
|
| | def gradio_wrapper(url, api_key, prompt): |
| | |
| | loop = asyncio.new_event_loop() |
| | asyncio.set_event_loop(loop) |
| | try: |
| | return loop.run_until_complete(extract_with_gemini(url, api_key, prompt)) |
| | finally: |
| | loop.close() |
| |
|
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown("# 🤖 Crawl4AI + Gemini AI Extractor") |
| | with gr.Row(): |
| | with gr.Column(): |
| | url_input = gr.Textbox(label="Website URL", placeholder="https://example.com") |
| | api_key = gr.Textbox(label="Gemini API Key", type="password") |
| | instruction = gr.Textbox(label="কী বের করতে চান?", lines=4) |
| | btn = gr.Button("🚀 শুরু করুন", variant="primary") |
| | with gr.Column(): |
| | output_text = gr.Code(label="Result", language="json") |
| |
|
| | btn.click(fn=gradio_wrapper, inputs=[url_input, api_key, instruction], outputs=output_text) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch(server_name="0.0.0.0", server_port=7860) |