Spaces:
Build error
Build error
Upload app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""app.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/1Yo-pZqBc-DxiTzP9TyKKFInBqeeQrlTb
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
import json
|
| 12 |
+
import asyncio
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
from typing import Dict, List, Any
|
| 15 |
+
|
| 16 |
+
import streamlit as st
|
| 17 |
+
|
| 18 |
+
from helper import ChatBot, current_year, save_to_audio, invoke_duckduckgo_news_search
|
| 19 |
+
|
| 20 |
+
# ============================ FRONT-END SETUP ============================
|
| 21 |
+
|
| 22 |
+
st.set_page_config(layout="wide") # Set Streamlit layout to wide mode
|
| 23 |
+
st.title("SearchBot π€") # App title
|
| 24 |
+
|
| 25 |
+
# ============================ SIDEBAR SETTINGS ============================
|
| 26 |
+
|
| 27 |
+
with st.sidebar:
|
| 28 |
+
with st.expander("π Instruction Manual"):
|
| 29 |
+
st.markdown(
|
| 30 |
+
"""
|
| 31 |
+
## π§ SearchBot π€ - Your AI-Powered Research Assistant
|
| 32 |
+
Welcome to **SearchBot**, an advanced AI assistant that helps you find the latest news, trends, and information
|
| 33 |
+
across various sources.
|
| 34 |
+
|
| 35 |
+
### πΉ How to Use:
|
| 36 |
+
1. **π Choose Search Source**
|
| 37 |
+
- Select the type of search (News, Research Papers, Web Articles).
|
| 38 |
+
2. **π Choose Number of Results**
|
| 39 |
+
- Decide how many results you want (1 to 10).
|
| 40 |
+
3. **π Set Location**
|
| 41 |
+
- Customize search results based on location.
|
| 42 |
+
*(e.g., "us-en" for USA, "in-en" for India)*
|
| 43 |
+
4. **β³ Filter by Time**
|
| 44 |
+
- Search for the most recent news or past articles:
|
| 45 |
+
- **Past Day** π (Breaking News)
|
| 46 |
+
- **Past Week** π (Trending Topics)
|
| 47 |
+
- **Past Month** π
(Major Stories)
|
| 48 |
+
- **Past Year** οΏ½οΏ½οΏ½οΏ½ (Deep Research)
|
| 49 |
+
5. **π¬ Review Search Results & Chat History**
|
| 50 |
+
- View results in an interactive table.
|
| 51 |
+
- Chatbot provides summarized responses with references.
|
| 52 |
+
|
| 53 |
+
---
|
| 54 |
+
|
| 55 |
+
### πΉ Live Examples You Can Try:
|
| 56 |
+
**π° Find Latest News**
|
| 57 |
+
- *"What are the latest AI breakthroughs?"*
|
| 58 |
+
- *"Recent developments in space exploration."*
|
| 59 |
+
|
| 60 |
+
**π Research Papers & Analysis**
|
| 61 |
+
- *"Most cited papers on quantum computing."*
|
| 62 |
+
- *"Deep learning advancements in 2024."*
|
| 63 |
+
|
| 64 |
+
**π Location-Based Information**
|
| 65 |
+
- *"Tech news in Silicon Valley."*
|
| 66 |
+
- *"Political updates in the UK."*
|
| 67 |
+
|
| 68 |
+
**β‘ AI-Powered Chatbot Insights**
|
| 69 |
+
- *"Summarize recent news on cryptocurrency."*
|
| 70 |
+
- *"Give me top AI news from last week with analysis."*
|
| 71 |
+
|
| 72 |
+
"""
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# User inputs for search customization
|
| 76 |
+
num: int = st.number_input("π Number of results", value=7, step=1, min_value=1, max_value=10)
|
| 77 |
+
location: str = st.text_input("π Location (e.g., us-en, in-en)", value="us-en")
|
| 78 |
+
time_filter: str = st.selectbox(
|
| 79 |
+
"β³ Time filter",
|
| 80 |
+
["Past Day", "Past Week", "Past Month", "Past Year"],
|
| 81 |
+
index=1
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# Convert time filter to DuckDuckGo-compatible format
|
| 85 |
+
time_mapping: Dict[str, str] = {"Past Day": "d", "Past Week": "w", "Past Month": "m", "Past Year": "y"}
|
| 86 |
+
time_filter = time_mapping[time_filter]
|
| 87 |
+
|
| 88 |
+
only_use_chatbot: bool = st.checkbox("π¬ Only use chatbot (Disable Search)")
|
| 89 |
+
|
| 90 |
+
# Clear chat history button
|
| 91 |
+
if st.button("π§Ή Clear Session"):
|
| 92 |
+
st.session_state.messages = []
|
| 93 |
+
st.rerun()
|
| 94 |
+
|
| 95 |
+
# Footer with dynamic year
|
| 96 |
+
st.markdown(f"<h6>π
Copyright Β© 2010-{current_year()} Present</h6>", unsafe_allow_html=True)
|
| 97 |
+
|
| 98 |
+
# ============================ CHAT HISTORY SETUP ============================
|
| 99 |
+
|
| 100 |
+
# Initialize chat history
|
| 101 |
+
if "messages" not in st.session_state:
|
| 102 |
+
st.session_state.messages: List[Dict[str, str]] = []
|
| 103 |
+
|
| 104 |
+
# Ensure messages are always a list of dictionaries
|
| 105 |
+
if not isinstance(st.session_state.messages, list) or not all(isinstance(msg, dict) for msg in st.session_state.messages):
|
| 106 |
+
st.session_state.messages = []
|
| 107 |
+
|
| 108 |
+
# Display past chat history in Streamlit chat UI
|
| 109 |
+
for message in st.session_state.messages:
|
| 110 |
+
with st.chat_message(message["role"]):
|
| 111 |
+
st.markdown(message["content"])
|
| 112 |
+
|
| 113 |
+
# ============================ CHAT INPUT & PROCESSING ============================
|
| 114 |
+
|
| 115 |
+
# Process user input in the chatbox
|
| 116 |
+
if prompt := st.chat_input("Ask anything!"):
|
| 117 |
+
st.chat_message("user").markdown(prompt)
|
| 118 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 119 |
+
|
| 120 |
+
# **Initialize ref_table_string to hold search results**
|
| 121 |
+
ref_table_string: str = "**No references found.**"
|
| 122 |
+
|
| 123 |
+
try:
|
| 124 |
+
with st.spinner("Searching..."): # Show loading spinner
|
| 125 |
+
if only_use_chatbot:
|
| 126 |
+
response: str = "<empty>"
|
| 127 |
+
else:
|
| 128 |
+
# **Call async search function using `asyncio.run()`**
|
| 129 |
+
search_results: Dict[str, Any] = asyncio.run(
|
| 130 |
+
invoke_duckduckgo_news_search(query=prompt, location=location, num=num, time_filter=time_filter)
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
if search_results["status"] == "success":
|
| 134 |
+
md_data: List[Dict[str, Any]] = search_results["results"]
|
| 135 |
+
response = f"Here are your search results:\n{md_data}"
|
| 136 |
+
|
| 137 |
+
def clean_title(title: str) -> str:
|
| 138 |
+
"""
|
| 139 |
+
Cleans the title by replacing '|' with '-' to ensure proper formatting.
|
| 140 |
+
|
| 141 |
+
Args:
|
| 142 |
+
title (str): The original title.
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
str: The cleaned title with '|' replaced by '-'.
|
| 146 |
+
"""
|
| 147 |
+
return title.replace("|", " - ").strip() # Replace '|' with ' - ' and remove leading/trailing spaces
|
| 148 |
+
|
| 149 |
+
def generate_star_rating(rating: str) -> str:
|
| 150 |
+
"""
|
| 151 |
+
Converts a numeric rating into a star representation (supports half-stars).
|
| 152 |
+
|
| 153 |
+
Args:
|
| 154 |
+
rating (str): The rating value as a string.
|
| 155 |
+
|
| 156 |
+
Returns:
|
| 157 |
+
str: A string representation of the rating using stars (β) and half-stars (βΒ½).
|
| 158 |
+
"""
|
| 159 |
+
try:
|
| 160 |
+
rating_float: float = float(rating) # Convert rating to float
|
| 161 |
+
full_stars: int = int(rating_float) # Extract full stars
|
| 162 |
+
half_star: str = "βΒ½" if (rating_float - full_stars) >= 0.5 else "" # Add half-star if needed
|
| 163 |
+
return "β" * full_stars + half_star # Construct final star rating
|
| 164 |
+
except ValueError:
|
| 165 |
+
return "N/A" # Fallback for non-numeric ratings
|
| 166 |
+
|
| 167 |
+
# Start building reference table with proper Markdown formatting
|
| 168 |
+
ref_table_string = "| Num | Title | Rating | Context |\n|---|------|--------|---------|\n"
|
| 169 |
+
|
| 170 |
+
for res in md_data:
|
| 171 |
+
# **Fix: Clean the title by replacing '|' with '-'**
|
| 172 |
+
title_cleaned = clean_title(res['title'])
|
| 173 |
+
|
| 174 |
+
# **Ensure the rating is always numeric before converting to stars**
|
| 175 |
+
raw_rating = str(res.get('rating', 'N/A')).strip() # Get rating and strip whitespace
|
| 176 |
+
|
| 177 |
+
# Fix: Only convert rating if itβs a valid number
|
| 178 |
+
if raw_rating.replace('.', '', 1).isdigit(): # Check if itβs a valid float
|
| 179 |
+
stars = generate_star_rating(raw_rating)
|
| 180 |
+
else:
|
| 181 |
+
stars = "N/A" # If it's text (like "MIT News"), default to "N/A"
|
| 182 |
+
|
| 183 |
+
# **Ensure proper clickable links in the Title column**
|
| 184 |
+
if res.get('link', '').startswith("http"): # Ensure link exists and is valid
|
| 185 |
+
title = f"[{title_cleaned}]({res['link']})"
|
| 186 |
+
else:
|
| 187 |
+
title = title_cleaned # Fallback to text-only title
|
| 188 |
+
|
| 189 |
+
# **Properly format Context column (limit to 100 chars)**
|
| 190 |
+
context_summary = res.get('summary', '').strip() # Ensure it's a string and strip spaces
|
| 191 |
+
summary = context_summary[:100] + "..." if len(context_summary) > 100 else context_summary
|
| 192 |
+
|
| 193 |
+
# **Final row construction**
|
| 194 |
+
ref_table_string += f"| {res['num']} | {title} | {stars} | {summary} |\n"
|
| 195 |
+
|
| 196 |
+
# **Generate chatbot response based on search results or chat history**
|
| 197 |
+
bot = ChatBot()
|
| 198 |
+
bot.history = st.session_state.messages.copy()
|
| 199 |
+
response = bot.generate_response(
|
| 200 |
+
f"""
|
| 201 |
+
User prompt: {prompt}
|
| 202 |
+
Search results: {response}
|
| 203 |
+
Context: {[res['summary'] for res in search_results.get("results", [])]}
|
| 204 |
+
If search results exist, use them for the answer.
|
| 205 |
+
Otherwise, generate a response based on chat history.
|
| 206 |
+
"""
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
st.warning(f"Error fetching data: {e}")
|
| 211 |
+
response = "We encountered an issue. Please try again later."
|
| 212 |
+
|
| 213 |
+
# **Convert response to audio**
|
| 214 |
+
save_to_audio(response)
|
| 215 |
+
|
| 216 |
+
# **Display assistant response in chat UI**
|
| 217 |
+
with st.chat_message("assistant"):
|
| 218 |
+
st.markdown(response, unsafe_allow_html=True)
|
| 219 |
+
st.audio("output.mp3", format="audio/mpeg", loop=True)
|
| 220 |
+
with st.expander("References:", expanded=True):
|
| 221 |
+
st.markdown(ref_table_string, unsafe_allow_html=True)
|
| 222 |
+
|
| 223 |
+
# **Update chat history with final response**
|
| 224 |
+
final_response: str = f"{response}\n\n{ref_table_string}"
|
| 225 |
+
st.session_state.messages.append({"role": "assistant", "content": final_response})
|
| 226 |
+
|