Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from duckduckgo_search import DDGS | |
| from transformers import pipeline | |
| from PIL import Image | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import re | |
| import torch | |
| from io import BytesIO | |
| # Pipelines | |
| qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") | |
| caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
| summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
| # Utils | |
| def search_web(query, max_results=3): | |
| with DDGS() as ddgs: | |
| results = ddgs.text(query, max_results=max_results) | |
| return "\n\n".join([f"**{r['title']}**\n{r['body']}\n{r['href']}" for r in results]) | |
| def explain_image(img): | |
| return caption_pipeline(img)[0]['generated_text'] | |
| def extract_text_from_url(url): | |
| try: | |
| res = requests.get(url, timeout=5) | |
| soup = BeautifulSoup(res.text, 'html.parser') | |
| # Remove scripts/styles | |
| for script in soup(["script", "style"]): script.extract() | |
| text = soup.get_text(separator=' ') | |
| clean_text = re.sub(r'\s+', ' ', text) | |
| return clean_text[:3000] # Limit to 3000 characters | |
| except Exception as e: | |
| return f"Failed to extract text: {str(e)}" | |
| def summarize_url(url): | |
| text = extract_text_from_url(url) | |
| if len(text) > 100: | |
| summary = summarizer(text[:1024])[0]['summary_text'] | |
| return summary | |
| return "Not enough text to summarize." | |
| # Main Agent Function | |
| def ai_agent(input_text, image=None, url=None): | |
| results = [] | |
| # Process Image | |
| if image: | |
| results.append("πΌοΈ **Image Explanation:**\n" + explain_image(image)) | |
| # Process URL | |
| if url: | |
| if "youtube.com" in url or "youtu.be" in url: | |
| results.append("πΉ **Video URL detected.** Currently only summaries of page content are available.") | |
| results.append("π **Webpage Summary:**\n" + summarize_url(url)) | |
| # Web search for complex questions | |
| if input_text: | |
| if len(input_text.split()) > 10: # assume complex | |
| web_results = search_web(input_text) | |
| results.append("π **Web Search Results:**\n" + web_results) | |
| else: | |
| results.append("π§ **Answer:**\n" + search_web(input_text)) | |
| return "\n\n---\n\n".join(results) | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## ππ§ Multi-Modal AI Agent (Web + Image + URL)") | |
| with gr.Row(): | |
| input_text = gr.Textbox(label="Ask a Question", lines=2, placeholder="E.g. What are the latest AI trends?") | |
| image = gr.Image(type="pil", label="Upload an Image (optional)") | |
| url = gr.Textbox(label="Provide a URL (optional)", placeholder="https://example.com") | |
| submit = gr.Button("Get Answer") | |
| output = gr.Markdown() | |
| submit.click(fn=ai_agent, inputs=[input_text, image, url], outputs=output) | |
| demo.launch() | |