WikiBot / src /streamlit_app.py
qwerty45-uiop's picture
Update src/streamlit_app.py
fd6fd27 verified
import streamlit as st
import requests
import json
from typing import Dict, List, Optional, Tuple
import re
from urllib.parse import quote
import time
from datetime import datetime
import pandas as pd
from collections import Counter
import hashlib
st.set_page_config(
page_title="WikiBot Pro - AI-Powered Multilingual Assistant",
page_icon="🤖",
layout="wide",
initial_sidebar_state="expanded"
)
LANGUAGES = {
"English": {"code": "en", "flag": "🇺🇸", "native": "English"},
"Telugu": {"code": "te", "flag": "🇮🇳", "native": "తెలుగు"},
"Hindi": {"code": "hi", "flag": "🇮🇳", "native": "हिन्दी"},
"Spanish": {"code": "es", "flag": "🇪🇸", "native": "Español"},
"French": {"code": "fr", "flag": "🇫🇷", "native": "Français"},
"German": {"code": "de", "flag": "🇩🇪", "native": "Deutsch"},
"Italian": {"code": "it", "flag": "🇮🇹", "native": "Italiano"},
"Portuguese": {"code": "pt", "flag": "🇵🇹", "native": "Português"},
"Russian": {"code": "ru", "flag": "🇷🇺", "native": "Русский"},
"Japanese": {"code": "ja", "flag": "🇯🇵", "native": "日本語"},
"Chinese": {"code": "zh", "flag": "🇨🇳", "native": "中文"},
"Arabic": {"code": "ar", "flag": "🇸🇦", "native": "العربية"},
"Korean": {"code": "ko", "flag": "🇰🇷", "native": "한국어"},
"Tamil": {"code": "ta", "flag": "🇮🇳", "native": "தமிழ்"},
"Bengali": {"code": "bn", "flag": "🇧🇩", "native": "বাংলা"},
"Marathi": {"code": "mr", "flag": "🇮🇳", "native": "मराठी"},
"Gujarati": {"code": "gu", "flag": "🇮🇳", "native": "ગુજરાતી"},
"Kannada": {"code": "kn", "flag": "🇮🇳", "native": "ಕನ್ನಡ"},
"Malayalam": {"code": "ml", "flag": "🇮🇳", "native": "മലയാളം"},
"Punjabi": {"code": "pa", "flag": "🇮🇳", "native": "ਪੰਜਾਬੀ"}
}
THEMES = {
"Default": {"primary": "#1f77b4", "background": "#ffffff", "text": "#000000"},
"Dark": {"primary": "#00d4aa", "background": "#0e1117", "text": "#ffffff"},
"Ocean": {"primary": "#0077be", "background": "#f0f8ff", "text": "#003366"},
"Forest": {"primary": "#228b22", "background": "#f5fff5", "text": "#006400"},
"Sunset": {"primary": "#ff6b35", "background": "#fff5f0", "text": "#8b0000"}
}
class WikipediaAPI:
def __init__(self):
self.base_url = "https://{}.wikipedia.org/api/rest_v1"
self.search_url = "https://{}.wikipedia.org/w/api.php"
self.cache = {}
def _get_cache_key(self, *args) -> str:
"""Generate cache key from arguments"""
key_string = "_".join(str(arg) for arg in args)
return hashlib.md5(key_string.encode()).hexdigest()
def search_articles(self, query: str, lang: str = "en", limit: int = 5) -> List[Dict]:
"""Search for Wikipedia articles with caching"""
cache_key = self._get_cache_key("search", query, lang, limit)
if cache_key in self.cache:
return self.cache[cache_key]
try:
params = {
"action": "query",
"format": "json",
"list": "search",
"srsearch": query,
"srlimit": limit,
"srprop": "snippet|titlesnippet|size|wordcount|timestamp"
}
url = self.search_url.format(lang)
response = requests.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
results = data.get("query", {}).get("search", [])
self.cache[cache_key] = results
return results
except Exception as e:
st.error(f"Search error: {str(e)}")
return []
def get_page_summary(self, title: str, lang: str = "en") -> Optional[Dict]:
"""Get page summary using REST API with caching"""
cache_key = self._get_cache_key("summary", title, lang)
if cache_key in self.cache:
return self.cache[cache_key]
try:
encoded_title = quote(title.replace(" ", "_"))
url = f"{self.base_url.format(lang)}/page/summary/{encoded_title}"
response = requests.get(url, timeout=10)
response.raise_for_status()
result = response.json()
self.cache[cache_key] = result
return result
except Exception as e:
st.error(f"Summary error: {str(e)}")
return None
def get_page_content(self, title: str, lang: str = "en", sections: int = 3) -> Optional[str]:
"""Get page content sections"""
try:
params = {
"action": "query",
"format": "json",
"prop": "extracts",
"exintro": True,
"explaintext": True,
"exsectionformat": "plain",
"titles": title,
"exchars": 3000
}
url = self.search_url.format(lang)
response = requests.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
pages = data.get("query", {}).get("pages", {})
for page_id, page_data in pages.items():
if "extract" in page_data:
return page_data["extract"]
return None
except Exception as e:
st.error(f"Content error: {str(e)}")
return None
def get_random_article(self, lang: str = "en") -> Optional[Dict]:
"""Get a random Wikipedia article"""
try:
params = {
"action": "query",
"format": "json",
"list": "random",
"rnnamespace": 0,
"rnlimit": 1
}
url = self.search_url.format(lang)
response = requests.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
random_pages = data.get("query", {}).get("random", [])
if random_pages:
title = random_pages[0]["title"]
return self.get_page_summary(title, lang)
return None
except Exception as e:
st.error(f"Random article error: {str(e)}")
return None
def get_page_categories(self, title: str, lang: str = "en") -> List[str]:
"""Get categories for a Wikipedia page"""
try:
params = {
"action": "query",
"format": "json",
"prop": "categories",
"titles": title,
"cllimit": 10
}
url = self.search_url.format(lang)
response = requests.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
pages = data.get("query", {}).get("pages", {})
for page_id, page_data in pages.items():
if "categories" in page_data:
return [cat["title"].replace("Category:", "") for cat in page_data["categories"]]
return []
except Exception as e:
return []
def init_session_state():
"""Initialize session state variables"""
if 'search_history' not in st.session_state:
st.session_state.search_history = []
if 'favorites' not in st.session_state:
st.session_state.favorites = []
if 'theme' not in st.session_state:
st.session_state.theme = "Default"
if 'user_preferences' not in st.session_state:
st.session_state.user_preferences = {
"default_language": "English",
"results_per_page": 5,
"summary_length": "Medium",
"show_images": True,
"auto_translate": False
}
def apply_theme(theme_name: str):
"""Apply selected theme"""
theme = THEMES[theme_name]
st.markdown(f"""
<style>
.main {{
background-color: {theme["background"]};
color: {theme["text"]};
}}
.stSelectbox label, .stTextInput label, .stSlider label {{
color: {theme["text"]} !important;
}}
.result-card {{
background-color: {theme["background"]};
border-color: {theme["primary"]};
color: {theme["text"]};
}}
.metric-card {{
background: linear-gradient(135deg, {theme["primary"]}20, {theme["primary"]}10);
border-left: 4px solid {theme["primary"]};
}}
</style>
""", unsafe_allow_html=True)
def clean_html(text: str) -> str:
"""Remove HTML tags from text"""
clean = re.compile('<.*?>')
return re.sub(clean, '', text)
def summarize_text(text: str, length: str = "Medium") -> str:
"""Advanced text summarization"""
sentences = re.split(r'[.!?]+', text)
sentences = [s.strip() for s in sentences if s.strip()]
if length == "Short":
return '. '.join(sentences[:2]) + '.'
elif length == "Medium":
return '. '.join(sentences[:4]) + '.'
else: # Long
return '. '.join(sentences[:6]) + '.'
def add_to_search_history(query: str, language: str, results_count: int):
"""Add search to history"""
search_entry = {
"query": query,
"language": language,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"results_count": results_count
}
st.session_state.search_history.insert(0, search_entry)
# Keep only last 50 searches
st.session_state.search_history = st.session_state.search_history[:50]
def create_search_analytics():
"""Create search analytics visualization"""
if not st.session_state.search_history:
return None
df = pd.DataFrame(st.session_state.search_history)
# Language distribution
lang_counts = df['language'].value_counts()
fig_lang = px.pie(
values=lang_counts.values,
names=lang_counts.index,
title="Search Languages Distribution",
color_discrete_sequence=px.colors.qualitative.Set3
)
# Search timeline
df['date'] = pd.to_datetime(df['timestamp']).dt.date
daily_searches = df.groupby('date').size().reset_index(name='searches')
fig_timeline = px.line(
daily_searches,
x='date',
y='searches',
title="Daily Search Activity",
markers=True
)
return fig_lang, fig_timeline
def sidebar_content():
"""Create enhanced sidebar content"""
st.sidebar.title("🤖 WikiBot Pro")
st.sidebar.markdown("---")
# Theme selector
st.sidebar.subheader("🎨 Appearance")
theme = st.sidebar.selectbox(
"Theme",
options=list(THEMES.keys()),
index=list(THEMES.keys()).index(st.session_state.theme),
key="theme_selector"
)
if theme != st.session_state.theme:
st.session_state.theme = theme
st.rerun()
# User preferences
st.sidebar.subheader("⚙️ Preferences")
default_lang = st.sidebar.selectbox(
"Default Language",
options=list(LANGUAGES.keys()),
index=list(LANGUAGES.keys()).index(st.session_state.user_preferences["default_language"])
)
show_images = st.sidebar.checkbox(
"Show Images",
value=st.session_state.user_preferences["show_images"]
)
# Update preferences
st.session_state.user_preferences.update({
"default_language": default_lang,
"show_images": show_images
})
# Quick actions
st.sidebar.subheader("🚀 Quick Actions")
if st.sidebar.button("🎲 Random Article", use_container_width=True):
st.session_state.random_article_trigger = True
if st.sidebar.button("📊 Search Analytics", use_container_width=True):
st.session_state.show_analytics = True
if st.sidebar.button("🗑️ Clear History", use_container_width=True):
st.session_state.search_history = []
st.sidebar.success("History cleared!")
# Search history
if st.session_state.search_history:
st.sidebar.subheader("🕐 Recent Searches")
for i, search in enumerate(st.session_state.search_history[:5]):
with st.sidebar.expander(f"{search['query'][:20]}..."):
st.write(f"**Language:** {search['language']}")
st.write(f"**Time:** {search['timestamp']}")
st.write(f"**Results:** {search['results_count']}")
if st.button(f"🔄 Repeat", key=f"repeat_{i}"):
st.session_state.repeat_search = search
# Statistics
st.sidebar.subheader("📈 Statistics")
total_searches = len(st.session_state.search_history)
favorite_lang = "None"
if st.session_state.search_history:
lang_counter = Counter([s['language'] for s in st.session_state.search_history])
favorite_lang = lang_counter.most_common(1)[0][0] if lang_counter else "None"
st.sidebar.metric("Total Searches", total_searches)
st.sidebar.metric("Favorite Language", favorite_lang)
st.sidebar.metric("Favorites Saved", len(st.session_state.favorites))
def main():
init_session_state()
apply_theme(st.session_state.theme)
# Enhanced CSS
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
html, body, [class*="css"] {
font-family: 'Inter', sans-serif;
}
.main-header {
text-align: center;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 700;
font-size: 3rem;
margin-bottom: 0.5rem;
}
.subtitle {
text-align: center;
color: #6c757d;
font-size: 1.2rem;
margin-bottom: 2rem;
font-weight: 300;
}
.search-container {
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
padding: 2rem;
border-radius: 20px;
margin-bottom: 2rem;
box-shadow: 0 10px 30px rgba(0,0,0,0.1);
backdrop-filter: blur(10px);
}
.result-card {
background: white;
padding: 1.5rem;
border-radius: 15px;
border: 1px solid #e9ecef;
margin-bottom: 1.5rem;
box-shadow: 0 5px 15px rgba(0,0,0,0.08);
transition: all 0.3s ease;
position: relative;
overflow: hidden;
}
.result-card:hover {
transform: translateY(-2px);
box-shadow: 0 8px 25px rgba(0,0,0,0.15);
}
.result-card::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
height: 4px;
background: linear-gradient(90deg, #667eea, #764ba2);
}
.article-title {
color: #2c3e50;
font-weight: 600;
font-size: 1.3rem;
margin-bottom: 1rem;
line-height: 1.4;
}
.article-meta {
display: flex;
gap: 1rem;
margin-bottom: 1rem;
font-size: 0.9rem;
color: #6c757d;
}
.metric-card {
background: linear-gradient(135deg, #667eea20, #764ba210);
padding: 1rem;
border-radius: 10px;
border-left: 4px solid #667eea;
margin-bottom: 1rem;
transition: all 0.3s ease;
}
.metric-card:hover {
transform: scale(1.02);
}
.floating-button {
position: fixed;
bottom: 2rem;
right: 2rem;
background: linear-gradient(135deg, #667eea, #764ba2);
color: white;
border: none;
border-radius: 50%;
width: 60px;
height: 60px;
font-size: 1.5rem;
cursor: pointer;
box-shadow: 0 4px 15px rgba(0,0,0,0.2);
transition: all 0.3s ease;
z-index: 1000;
}
.floating-button:hover {
transform: scale(1.1);
box-shadow: 0 6px 20px rgba(0,0,0,0.3);
}
.feature-badge {
display: inline-block;
background: linear-gradient(135deg, #ff9a56, #ff6b6b);
color: white;
padding: 0.25rem 0.75rem;
border-radius: 50px;
font-size: 0.8rem;
font-weight: 500;
margin: 0.25rem;
}
.category-tag {
display: inline-block;
background: #e3f2fd;
color: #1976d2;
padding: 0.25rem 0.5rem;
border-radius: 5px;
font-size: 0.8rem;
margin: 0.25rem;
}
@media (max-width: 768px) {
.main-header {
font-size: 2rem;
}
.search-container {
padding: 1rem;
}
.floating-button {
bottom: 1rem;
right: 1rem;
width: 50px;
height: 50px;
font-size: 1.2rem;
}
}
.animate-fade-in {
animation: fadeIn 0.5s ease-in;
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(20px); }
to { opacity: 1; transform: translateY(0); }
}
</style>
""", unsafe_allow_html=True)
# Sidebar
sidebar_content()
# Main header
st.markdown("<h1 class='main-header'>🤖 WikiBot Pro</h1>", unsafe_allow_html=True)
st.markdown("<p class='subtitle'>AI-Powered Multilingual Wikipedia Assistant with Advanced Features</p>", unsafe_allow_html=True)
# Feature highlights
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown("<div class='metric-card'><h3>🌍</h3><p><strong>20+ Languages</strong><br>Multilingual Support</p></div>", unsafe_allow_html=True)
with col2:
st.markdown("<div class='metric-card'><h3>🤖</h3><p><strong>AI-Powered</strong><br>Smart Summaries</p></div>", unsafe_allow_html=True)
with col3:
st.markdown("<div class='metric-card'><h3>⚡</h3><p><strong>Fast Search</strong><br>Cached Results</p></div>", unsafe_allow_html=True)
with col4:
st.markdown("<div class='metric-card'><h3>📊</h3><p><strong>Analytics</strong><br>Search Insights</p></div>", unsafe_allow_html=True)
# Initialize API
wiki_api = WikipediaAPI()
# Handle random article trigger
if hasattr(st.session_state, 'random_article_trigger'):
with st.spinner("🎲 Finding a random article..."):
random_article = wiki_api.get_random_article(
LANGUAGES[st.session_state.user_preferences["default_language"]]["code"]
)
if random_article:
st.success("🎲 Random Article Discovery!")
display_article_card(random_article, wiki_api, 0,
LANGUAGES[st.session_state.user_preferences["default_language"]]["code"])
delattr(st.session_state, 'random_article_trigger')
# Handle analytics display
if hasattr(st.session_state, 'show_analytics'):
st.subheader("📊 Search Analytics")
analytics = create_search_analytics()
if analytics:
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(analytics[0], use_container_width=True)
with col2:
st.plotly_chart(analytics[1], use_container_width=True)
else:
st.info("No search history available for analytics.")
delattr(st.session_state, 'show_analytics')
# Search interface
st.markdown("<div class='search-container'>", unsafe_allow_html=True)
col1, col2 = st.columns([3, 1])
with col1:
query = st.text_input(
"🔍 Search Wikipedia",
placeholder="e.g., 'Artificial Intelligence', 'కృష్ణ నది', 'गांधी जी'",
help="Enter your search query in any language",
value=getattr(st.session_state, 'repeat_search', {}).get('query', '')
)
with col2:
# Get language options with flags and native names
lang_options = [f"{LANGUAGES[lang]['flag']} {lang} ({LANGUAGES[lang]['native']})"
for lang in LANGUAGES.keys()]
selected_lang_display = st.selectbox(
"🌍 Language",
options=lang_options,
index=list(LANGUAGES.keys()).index(st.session_state.user_preferences["default_language"])
)
# Extract actual language name
selected_lang = selected_lang_display.split(' ', 1)[1].split(' (')[0]
# Advanced options
with st.expander("⚙️ Advanced Search Options"):
col1, col2, col3 = st.columns(3)
with col1:
num_results = st.slider("📄 Number of results", 1, 15,
st.session_state.user_preferences["results_per_page"])
with col2:
summary_length = st.selectbox("📝 Summary length",
["Short", "Medium", "Long"],
index=["Short", "Medium", "Long"].index(st.session_state.user_preferences["summary_length"]))
with col3:
search_mode = st.selectbox("🔍 Search mode",
["Standard", "Recent", "Popular"])
st.markdown("</div>", unsafe_allow_html=True)
# Search button
if st.button("🔎 Search Wikipedia", type="primary", use_container_width=True):
if query:
lang_code = LANGUAGES[selected_lang]["code"]
with st.spinner(f"🔍 Searching Wikipedia in {selected_lang}..."):
# Search for articles
search_results = wiki_api.search_articles(query, lang_code, num_results)
if search_results:
# Add to search history
add_to_search_history(query, selected_lang, len(search_results))
st.success(f"✅ Found {len(search_results)} results in {selected_lang}")
# Display results with enhanced cards
for idx, result in enumerate(search_results):
display_article_card(result, wiki_api, idx, lang_code, summary_length)
else:
st.warning(f"❌ No results found for '{query}' in {selected_lang}")
# Suggest alternative searches
st.info("💡 **Suggestions:**")
st.write("• Try different keywords or phrases")
st.write("• Switch to a different language")
st.write("• Check spelling and try simpler terms")
st.write("• Use the Random Article feature to explore")
else:
st.warning("⚠️ Please enter a search query")
# Clear repeat search
if hasattr(st.session_state, 'repeat_search'):
delattr(st.session_state, 'repeat_search')
# Footer with enhanced information
st.markdown("---")
st.markdown("### 🌟 WikiBot Pro Features")
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("""
**🌍 Multilingual Support**
- 20+ languages including Indian languages
- Native script support
- Cultural context awareness
""")
with col2:
st.markdown("""
**🤖 AI-Powered Features**
- Smart text summarization
- Intelligent caching
- Personalized recommendations
""")
with col3:
st.markdown("""
**📊 Advanced Analytics**
- Search history tracking
- Language usage patterns
- Performance insights
""")
def display_article_card(result: Dict, wiki_api: WikipediaAPI, idx: int, lang_code: str, summary_length: str = "Medium"):
"""Display enhanced article card"""
st.markdown("<div class='result-card animate-fade-in'>", unsafe_allow_html=True)
title = result.get("title", "")
# Article header with metadata
col1, col2 = st.columns([3, 1])
with col1:
st.markdown(f"<div class='article-title'>{idx+1}. {title}</div>", unsafe_allow_html=True)
# Metadata
wordcount = result.get("wordcount", 0)
size = result.get("size", 0)
timestamp = result.get("timestamp", "")
meta_html = f"""
<div class='article-meta'>
<span>📝 {wordcount} words</span>
<span>📊 {size} bytes</span>
<span>🕐 {timestamp[:10] if timestamp else 'Unknown'}</span>
</div>
"""
st.markdown(meta_html, unsafe_allow_html=True)
with col2:
# Action buttons
if st.button("⭐ Favorite", key=f"fav_{idx}"):
if title not in st.session_state.favorites:
st.session_state.favorites.append(title)
st.success("Added to favorites!")
if st.button("🔗 Share", key=f"share_{idx}"):
st.info(f"Share this article: {title}")
# Get detailed summary
summary_data = wiki_api.get_page_summary(title, lang_code)
if summary_data:
# Display with image if available
if st.session_state.user_preferences["show_images"] and "thumbnail" in summary_data:
col1, col2 = st.columns([1, 3])
with col1:
st.image(summary_data["thumbnail"]["source"], width=150, caption="Wikipedia Image")
with col2:
display_article_content(summary_data, summary_length)
else:
display_article_content(summary_data, summary_length)
# Categories
categories = wiki_api.get_page_categories(title, lang_code)
if categories:
st.markdown("**📚 Categories:**")
for cat in categories[:5]: # Show only first 5 categories
st.markdown(f"<span class='category-tag'>{cat}</span>", unsafe_allow_html=True)
# Wikipedia link
wiki_url = f"https://{lang_code}.wikipedia.org/wiki/{title.replace(' ', '_')}"
st.markdown(f"🔗 [Read full article on Wikipedia]({wiki_url})")
else:
# Fallback to search snippet
snippet = result.get("snippet", "No summary available")
cleaned_snippet = clean_html(snippet)
st.write(cleaned_snippet)
st.markdown("</div>", unsafe_allow_html=True)
def display_article_content(summary_data: Dict, summary_length: str):
"""Display article content with smart summarization"""
extract = summary_data.get("extract", "")
if extract:
summarized = summarize_text(extract, summary_length)
st.write(summarized)
# Display additional info if available
if "coordinates" in summary_data:
coords = summary_data["coordinates"]
st.info(f"📍 Location: {coords.get('lat', 0):.4f}, {coords.get('lon', 0):.4f}")
if __name__ == "__main__":
main()