ubra / app.py
mset's picture
Create app.py
09ba347 verified
import gradio as gr
import json
import os
import requests
from bs4 import BeautifulSoup
import spacy
from transformers import pipeline
from datetime import datetime
# Inizializza modelli NLP e di summarization
nlp = spacy.load("en_core_web_sm")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
class ConversationalUBRA:
def __init__(self):
self.conversation_history = []
self.sources = {
'duckduckgo': True,
'wikipedia': True,
'newsapi': False,
'google_scholar': False
}
def analyze_intent(self, query):
"""Analizza l'intento della query"""
doc = nlp(query)
intents = {
'information_request': any(token.pos_ in ['NOUN', 'PROPN'] for token in doc),
'comparison': any(word in query for word in ['vs', 'comparare', 'confrontare']),
'definition': any(word in query for word in ['cos\'è', 'significa', 'definizione']),
'how_to': any(word in query for word in ['come', 'funziona', 'procedura']),
'opinion': any(word in query for word in ['opinione', 'credi', 'pensiero'])
}
primary_intent = max(intents, key=intents.get) if any(intents.values()) else 'general'
return {
'primary': primary_intent,
'keywords': [token.lemma_.lower() for token in doc if not token.is_stop]
}
def collect_information(self, query, intent):
"""Raccolta dati da fonti attive"""
data_sources = []
if self.sources['duckduckgo']:
data_sources.extend(self.search_duckduckgo(query))
if self.sources['wikipedia']:
data_sources.extend(self.search_wikipedia(query))
if self.sources['newsapi']:
data_sources.extend(self.search_newsapi(query))
if self.sources['google_scholar']:
data_sources.extend(self.search_google_scholar(query))
return data_sources
def search_duckduckgo(self, query):
"""Ricerca su DuckDuckGo"""
try:
url = f"https://duckduckgo.com/html?q={query}"
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
results = []
for item in soup.select('.result__body')[:3]:
title = item.select_one('.result__title').get_text(strip=True)
snippet = item.select_one('.result__snippet').get_text(strip=True)
link = item.select_one('.result__url').get_text(strip=True)
results.append(f"🌐 DuckDuckGo:\n{title}\n{snippet}\nLink: {link}\n")
return results
except Exception as e:
return [f"⚠️ Errore DuckDuckGo: {str(e)}"]
def search_wikipedia(self, query):
"""Ricerca su Wikipedia"""
try:
url = f"https://it.wikipedia.org/w/api.php?action=query&list=search&srsearch={query}&format=json&srlimit=3"
response = requests.get(url, timeout=10)
data = response.json()
results = []
if 'query' in data and 'search' in data['query']:
for item in data['query']['search'][:3]:
title = item['title']
snippet = item['snippet'].replace('<span class="searchmatch">', '').replace('</span>', '')
page_url = f"https://it.wikipedia.org/wiki/{title.replace(' ', '_')}"
results.append(f"📚 Wikipedia:\n{title}\n{snippet}\nLink: {page_url}\n")
return results
except Exception as e:
return [f"⚠️ Errore Wikipedia: {str(e)}"]
def search_newsapi(self, query):
"""Ricerca su NewsAPI (richiede API key)"""
try:
if not hasattr(self, 'newsapi_key'):
return ["⚠️ NewsAPI non configurato. Imposta la chiave API."]
url = f"https://newsapi.org/v2/everything?q={query}&apiKey={self.newsapi_key}"
response = requests.get(url, timeout=10)
data = response.json()
results = []
if 'articles' in data:
for article in data['articles'][:3]:
title = article['title']
description = article['description']
url = article['url']
results.append(f"📰 NewsAPI:\n{title}\n{description}\nLink: {url}\n")
return results
except Exception as e:
return [f"⚠️ Errore NewsAPI: {str(e)}"]
def search_google_scholar(self, query):
"""Ricerca su Google Scholar (richiede API)"""
try:
if not hasattr(self, 'scholar_cx') or not hasattr(self, 'scholar_key'):
return ["⚠️ Google Scholar non configurato. Imposta cx e chiave API."]
url = f"https://www.googleapis.com/customsearch/v1?key={self.scholar_key}&cx={self.scholar_cx}&q={query}"
response = requests.get(url, timeout=10)
data = response.json()
results = []
if 'items' in data:
for item in data['items'][:3]:
title = item['title']
snippet = item['snippet']
link = item['link']
results.append(f"📚 Google Scholar:\n{title}\n{snippet}\nLink: {link}\n")
return results
except Exception as e:
return [f"⚠️ Errore Google Scholar: {str(e)}"]
def generate_response(self, query):
"""Genera una risposta basata sull'intento"""
intent = self.analyze_intent(query)
data = self.collect_information(query, intent)
if not data:
return "Non sono riuscito a trovare informazioni rilevanti."
if intent['primary'] == 'comparison':
return self.process_comparison(data)
elif intent['primary'] == 'how_to':
return self.process_how_to(data)
elif intent['primary'] == 'opinion':
return self.process_opinion(data)
else:
return self.summarize_data(data)
def process_comparison(self, data):
"""Processa dati per confronti"""
comparisons = []
for item in data:
if 'vs' in item or 'confronto' in item.lower():
comparisons.append(item)
if not comparisons:
return "Non ho trovato informazioni dirette per confrontare questi elementi."
return "Ecco i principali punti di confronto:\n\n" + "\n\n".join(comparisons[:3])
def process_how_to(self, data):
"""Processa dati per procedure"""
procedures = []
for item in data:
if any(step_word in item.lower() for step_word in ['passo', 'step', 'procedura']):
procedures.append(item)
if not procedures:
return "Non ho trovato istruzioni dettagliate. Prova a cercare con parole chiave come 'guida', 'tutorial' o 'istruzioni'."
return "Ecco i passaggi principali:\n\n" + "\n\n".join(procedures[:3])
def process_opinion(self, data):
"""Sintetizza opinioni da diverse fonti"""
opinions = []
for item in data:
if any(opinion_word in item.lower() for opinion_word in ['opinione', 'pensiero', 'considerazione']):
opinions.append(item)
if not opinions:
return "Non ho trovato opinioni esplicite. Posso fornirti informazioni oggettive sulle fonti consultate."
return "Ecco alcune opinioni rilevate:\n\n" + "\n\n".join(opinions[:3])
def summarize_data(self, data):
"""Sommazzina i dati raccolti"""
if not data:
return "Non sono riuscito a trovare informazioni rilevanti per la tua query."
combined_text = "\n\n---\n\n".join(data)
if len(combined_text) > 300:
summary = summarizer(combined_text, max_length=500, min_length=100)[0]['summary_text']
return summary
else:
return combined_text
# Interfaccia Gradio
def create_app():
app = ConversationalUBRA()
def respond(message, history):
response = app.generate_response(message)
return "", history + [[message, response]]
iface = gr.ChatInterface(
fn=respond,
examples=[
"Spiega i benefici dell'intelligenza artificiale",
"Confronta le energie rinnovabili vs fossili",
"Come preparare un piano di business?",
"Definisci la sostenibilità aziendale"
],
title="UBRA - Assistente Conversazionale Intelligente",
description="Un AI che ricerca e sintetizza informazioni da fonti affidabili. Chiedi anything!"
)
return iface
if __name__ == "__main__":
app = create_app()
app.launch()