Spaces:
Runtime error
Runtime error
| import re | |
| import requests | |
| from bs4 import BeautifulSoup | |
| # DuckDuckGo Search | |
| from duckduckgo_search import DDGS | |
| # SerpAPI client for various search engines (Google, Bing, etc.) | |
| from serpapi import GoogleSearch | |
| from rake_nltk import Rake | |
| import gradio as gr | |
| from transformers import pipeline | |
| # 1) Keyword extractor using RAKE | |
| rake = Rake() | |
| def extract_keywords(text: str) -> list[str]: | |
| rake.extract_keywords_from_text(text) | |
| return rake.get_ranked_phrases()[:5] | |
| # 2) Search functions | |
| def ddg_search_links(query: str, num: int = 5) -> list[str]: | |
| ddgs = DDGS() | |
| results = ddgs.text(query, max_results=num) | |
| return [r['href'] for r in results] | |
| def google_search_links(query: str, num: int = 5) -> list[str]: | |
| from googlesearch import search | |
| return list(search(query, num_results=num, pause=2)) | |
| def serpapi_search_links(query: str, api_key: str, engine: str = 'bing', num: int = 5) -> list[str]: | |
| params = {"engine": engine, "q": query, "api_key": api_key} | |
| client = GoogleSearch(params) | |
| data = client.get_dict() | |
| results = data.get('organic_results', []) | |
| return [r['link'] for r in results if not r.get('sponsored')][:num] | |
| # 3) Fetch page text for summarization | |
| def fetch_text(url: str) -> str: | |
| try: | |
| resp = requests.get(url, timeout=3) | |
| soup = BeautifulSoup(resp.text, 'html.parser') | |
| texts = soup.find_all(['p', 'h1', 'h2', 'h3']) | |
| return ' '.join(t.get_text() for t in texts) | |
| except: | |
| return '' | |
| # 4) Model loader: lightweight HF model | |
| generator = pipeline('text-generation', model='google/flan-t5-small', trust_remote_code=True) | |
| def model_answer(prompt: str) -> str: | |
| return generator(prompt, max_length=256, do_sample=False)[0]['generated_text'] | |
| # 5) Detect forbidden search phrases | |
| FORBID_PATTERNS = [ | |
| "bitte nicht im internet suchen", "keine websuche", "mach das ohne web", | |
| "ohne online", "nur dein wissen", "nicht googeln", "such nicht" | |
| ] | |
| def search_forbidden(prompt: str) -> bool: | |
| pl = prompt.lower() | |
| return any(phrase in pl for phrase in FORBID_PATTERNS) | |
| # 6) Check if answer is uncertain | |
| UNCERTAIN_MARKERS = [ | |
| "ich weiß nicht", "nicht in meinen daten", "keine information", "ich bin mir nicht sicher" | |
| ] | |
| def is_uncertain(answer: str) -> bool: | |
| al = answer.lower() | |
| return any(marker in al for marker in UNCERTAIN_MARKERS) | |
| # 7) Core processing logic | |
| def process(prompt: str, web_enabled: bool, serpapi_key: str) -> str: | |
| # Extract keywords for search | |
| keywords = extract_keywords(prompt) | |
| query = ' '.join(keywords) | |
| # If user forbids search | |
| if search_forbidden(prompt): | |
| ans = model_answer(prompt) | |
| if is_uncertain(ans): | |
| return ( | |
| "Ich weiß leider nichts über das Thema aus meinem Training. " | |
| "Da du Websuche verboten hast, versuche ich es trotzdem, " | |
| "aber es kann ungenau sein.\n\n" + ans | |
| ) | |
| return ans | |
| # If websearch disabled, just use model | |
| if not web_enabled: | |
| return model_answer(prompt) | |
| # Websearch enabled: model first | |
| ans = model_answer(prompt) | |
| if not is_uncertain(ans): | |
| return ans | |
| # Model uncertain: perform multi-engine search | |
| links = [] | |
| links += google_search_links(query) | |
| links += ddg_search_links(query) | |
| if serpapi_key: | |
| links += serpapi_search_links(query, serpapi_key, engine='bing') | |
| links += serpapi_search_links(query, serpapi_key, engine='google') | |
| unique_links = list(dict.fromkeys(links)) | |
| # Fetch top 3 pages and summarize | |
| texts = [fetch_text(u) for u in unique_links[:3]] | |
| combined = '\n'.join(texts) | |
| summary = generator(combined, max_length=256, do_sample=False)[0]['generated_text'] | |
| return summary | |
| # 8) Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Intelligente KI mit Multi-Engine-Websuche") | |
| with gr.Row(): | |
| prompt_input = gr.Textbox(label="Dein Prompt", lines=3) | |
| web_switch = gr.Checkbox(label="Websuche aktivieren", value=False) | |
| serp_input = gr.Textbox(label="SerpAPI Key (optional für SerpAPI-Suche)", placeholder="API Key einfügen") | |
| btn = gr.Button("Antwort generieren") | |
| output = gr.Textbox(label="Antwort", lines=10) | |
| btn.click( | |
| fn=process, | |
| inputs=[prompt_input, web_switch, serp_input], | |
| outputs=output | |
| ) | |
| gr.Spacer() | |
| gr.Markdown("---") | |
| gr.Markdown("*Hinweis: Suche nur bei aktivierter Websuche und nicht bei verbotenen Phrasen.*") | |
| demo.launch() |