DKethan commited on
Commit
a95ab01
·
verified ·
1 Parent(s): 0c40909

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ output.mp3 filter=lfs diff=lfs merge=lfs -text
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="55">
8
+ <item index="0" class="java.lang.String" itemvalue="notebook_shim" />
9
+ <item index="1" class="java.lang.String" itemvalue="jupyterlab_widgets" />
10
+ <item index="2" class="java.lang.String" itemvalue="greenlet" />
11
+ <item index="3" class="java.lang.String" itemvalue="arrow" />
12
+ <item index="4" class="java.lang.String" itemvalue="Babel" />
13
+ <item index="5" class="java.lang.String" itemvalue="nbclient" />
14
+ <item index="6" class="java.lang.String" itemvalue="QtPy" />
15
+ <item index="7" class="java.lang.String" itemvalue="executing" />
16
+ <item index="8" class="java.lang.String" itemvalue="tinycss2" />
17
+ <item index="9" class="java.lang.String" itemvalue="pyzmq" />
18
+ <item index="10" class="java.lang.String" itemvalue="bleach" />
19
+ <item index="11" class="java.lang.String" itemvalue="jupyter_server_terminals" />
20
+ <item index="12" class="java.lang.String" itemvalue="lxml" />
21
+ <item index="13" class="java.lang.String" itemvalue="prometheus_client" />
22
+ <item index="14" class="java.lang.String" itemvalue="notebook" />
23
+ <item index="15" class="java.lang.String" itemvalue="qtconsole" />
24
+ <item index="16" class="java.lang.String" itemvalue="terminado" />
25
+ <item index="17" class="java.lang.String" itemvalue="comm" />
26
+ <item index="18" class="java.lang.String" itemvalue="isoduration" />
27
+ <item index="19" class="java.lang.String" itemvalue="jupyter-lsp" />
28
+ <item index="20" class="java.lang.String" itemvalue="IMDbPY" />
29
+ <item index="21" class="java.lang.String" itemvalue="fqdn" />
30
+ <item index="22" class="java.lang.String" itemvalue="jupyter_client" />
31
+ <item index="23" class="java.lang.String" itemvalue="JustWatch" />
32
+ <item index="24" class="java.lang.String" itemvalue="ipykernel" />
33
+ <item index="25" class="java.lang.String" itemvalue="jupyterlab_server" />
34
+ <item index="26" class="java.lang.String" itemvalue="nbconvert" />
35
+ <item index="27" class="java.lang.String" itemvalue="psutil" />
36
+ <item index="28" class="java.lang.String" itemvalue="jupyterlab_pygments" />
37
+ <item index="29" class="java.lang.String" itemvalue="asttokens" />
38
+ <item index="30" class="java.lang.String" itemvalue="widgetsnbextension" />
39
+ <item index="31" class="java.lang.String" itemvalue="matplotlib-inline" />
40
+ <item index="32" class="java.lang.String" itemvalue="webcolors" />
41
+ <item index="33" class="java.lang.String" itemvalue="json5" />
42
+ <item index="34" class="java.lang.String" itemvalue="SQLAlchemy" />
43
+ <item index="35" class="java.lang.String" itemvalue="pandocfilters" />
44
+ <item index="36" class="java.lang.String" itemvalue="wcwidth" />
45
+ <item index="37" class="java.lang.String" itemvalue="types-python-dateutil" />
46
+ <item index="38" class="java.lang.String" itemvalue="uri-template" />
47
+ <item index="39" class="java.lang.String" itemvalue="jupyter" />
48
+ <item index="40" class="java.lang.String" itemvalue="stack-data" />
49
+ <item index="41" class="java.lang.String" itemvalue="jupyterlab" />
50
+ <item index="42" class="java.lang.String" itemvalue="ipywidgets" />
51
+ <item index="43" class="java.lang.String" itemvalue="prompt_toolkit" />
52
+ <item index="44" class="java.lang.String" itemvalue="parso" />
53
+ <item index="45" class="java.lang.String" itemvalue="nbformat" />
54
+ <item index="46" class="java.lang.String" itemvalue="ipython" />
55
+ <item index="47" class="java.lang.String" itemvalue="jsonpointer" />
56
+ <item index="48" class="java.lang.String" itemvalue="Send2Trash" />
57
+ <item index="49" class="java.lang.String" itemvalue="overrides" />
58
+ <item index="50" class="java.lang.String" itemvalue="mistune" />
59
+ <item index="51" class="java.lang.String" itemvalue="pytube" />
60
+ <item index="52" class="java.lang.String" itemvalue="jupyter-console" />
61
+ <item index="53" class="java.lang.String" itemvalue="debugpy" />
62
+ <item index="54" class="java.lang.String" itemvalue="argon2-cffi" />
63
+ </list>
64
+ </value>
65
+ </option>
66
+ </inspection_tool>
67
+ </profile>
68
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="searchbot-001" />
5
+ </component>
6
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/searchbot.iml" filepath="$PROJECT_DIR$/.idea/searchbot.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/searchbot.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="searchbot-001" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/workspace.xml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="AutoImportSettings">
4
+ <option name="autoReloadType" value="SELECTIVE" />
5
+ </component>
6
+ <component name="ChangeListManager">
7
+ <list default="true" id="0c898110-30f4-4a5c-b755-e3a433ebce38" name="Changes" comment="" />
8
+ <option name="SHOW_DIALOG" value="false" />
9
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
10
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
11
+ <option name="LAST_RESOLUTION" value="IGNORE" />
12
+ </component>
13
+ <component name="FileTemplateManagerImpl">
14
+ <option name="RECENT_TEMPLATES">
15
+ <list>
16
+ <option value="Python Script" />
17
+ </list>
18
+ </option>
19
+ </component>
20
+ <component name="ProjectColorInfo"><![CDATA[{
21
+ "associatedIndex": 3
22
+ }]]></component>
23
+ <component name="ProjectId" id="2sk4uY24GzIoOyfaI4SvqFInupk" />
24
+ <component name="ProjectViewState">
25
+ <option name="hideEmptyMiddlePackages" value="true" />
26
+ <option name="showLibraryContents" value="true" />
27
+ </component>
28
+ <component name="PropertiesComponent"><![CDATA[{
29
+ "keyToString": {
30
+ "Python.helper.executor": "Run",
31
+ "RunOnceActivity.ShowReadmeOnStart": "true",
32
+ "node.js.detected.package.eslint": "true",
33
+ "node.js.detected.package.tslint": "true",
34
+ "node.js.selected.package.eslint": "(autodetect)",
35
+ "node.js.selected.package.tslint": "(autodetect)",
36
+ "nodejs_package_manager_path": "npm",
37
+ "vue.rearranger.settings.migration": "true"
38
+ }
39
+ }]]></component>
40
+ <component name="SharedIndexes">
41
+ <attachedChunks>
42
+ <set>
43
+ <option value="bundled-js-predefined-d6986cc7102b-deb605915726-JavaScript-PY-243.22562.220" />
44
+ <option value="bundled-python-sdk-0fc6c617c4bd-9a18a617cbe4-com.jetbrains.pycharm.pro.sharedIndexes.bundled-PY-243.22562.220" />
45
+ </set>
46
+ </attachedChunks>
47
+ </component>
48
+ <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
49
+ <component name="TaskManager">
50
+ <task active="true" id="Default" summary="Default task">
51
+ <changelist id="0c898110-30f4-4a5c-b755-e3a433ebce38" name="Changes" comment="" />
52
+ <created>1738986461767</created>
53
+ <option name="number" value="Default" />
54
+ <option name="presentableId" value="Default" />
55
+ <updated>1738986461767</updated>
56
+ <workItem from="1738986462803" duration="703000" />
57
+ </task>
58
+ <servers />
59
+ </component>
60
+ <component name="TypeScriptGeneratedFilesManager">
61
+ <option name="version" value="3" />
62
+ </component>
63
+ <component name="com.intellij.coverage.CoverageDataManagerImpl">
64
+ <SUITE FILE_PATH="coverage/searchbot$helper.coverage" NAME="helper Coverage Results" MODIFIED="1738987126236" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
65
+ </component>
66
+ </project>
__pycache__/helper.cpython-312.pyc ADDED
Binary file (12.5 kB). View file
 
app.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import asyncio
4
+ from datetime import datetime
5
+ from typing import Dict, List, Any
6
+
7
+ import streamlit as st
8
+
9
+ from helper import ChatBot, current_year, save_to_audio, invoke_duckduckgo_news_search
10
+
11
+ # ============================ FRONT-END SETUP ============================
12
+
13
+ st.set_page_config(layout="wide") # Set Streamlit layout to wide mode
14
+ st.title("SearchBot 🤖") # App title
15
+
16
+ # ============================ SIDEBAR SETTINGS ============================
17
+
18
+ with st.sidebar:
19
+ with st.expander("📖 Instruction Manual"):
20
+ st.markdown(
21
+ """
22
+ ## 🧠 SearchBot 🤖 - Your AI-Powered Research Assistant
23
+ Welcome to **SearchBot**, an advanced AI assistant that helps you find the latest news, trends, and information
24
+ across various sources.
25
+
26
+ ### 🔹 How to Use:
27
+ 1. **📌 Choose Search Source**
28
+ - Select the type of search (News, Research Papers, Web Articles).
29
+ 2. **📊 Choose Number of Results**
30
+ - Decide how many results you want (1 to 10).
31
+ 3. **🌍 Set Location**
32
+ - Customize search results based on location.
33
+ *(e.g., "us-en" for USA, "in-en" for India)*
34
+ 4. **⏳ Filter by Time**
35
+ - Search for the most recent news or past articles:
36
+ - **Past Day** 🕐 (Breaking News)
37
+ - **Past Week** 🗓 (Trending Topics)
38
+ - **Past Month** 📅 (Major Stories)
39
+ - **Past Year** ���� (Deep Research)
40
+ 5. **💬 Review Search Results & Chat History**
41
+ - View results in an interactive table.
42
+ - Chatbot provides summarized responses with references.
43
+
44
+ ---
45
+
46
+ ### 🔹 Live Examples You Can Try:
47
+ **📰 Find Latest News**
48
+ - *"What are the latest AI breakthroughs?"*
49
+ - *"Recent developments in space exploration."*
50
+
51
+ **📖 Research Papers & Analysis**
52
+ - *"Most cited papers on quantum computing."*
53
+ - *"Deep learning advancements in 2024."*
54
+
55
+ **🌍 Location-Based Information**
56
+ - *"Tech news in Silicon Valley."*
57
+ - *"Political updates in the UK."*
58
+
59
+ **⚡ AI-Powered Chatbot Insights**
60
+ - *"Summarize recent news on cryptocurrency."*
61
+ - *"Give me top AI news from last week with analysis."*
62
+
63
+ """
64
+ )
65
+
66
+ # User inputs for search customization
67
+ num: int = st.number_input("📊 Number of results", value=7, step=1, min_value=1, max_value=10)
68
+ location: str = st.text_input("🌍 Location (e.g., us-en, in-en)", value="us-en")
69
+ time_filter: str = st.selectbox(
70
+ "⏳ Time filter",
71
+ ["Past Day", "Past Week", "Past Month", "Past Year"],
72
+ index=1
73
+ )
74
+
75
+ # Convert time filter to DuckDuckGo-compatible format
76
+ time_mapping: Dict[str, str] = {"Past Day": "d", "Past Week": "w", "Past Month": "m", "Past Year": "y"}
77
+ time_filter = time_mapping[time_filter]
78
+
79
+ only_use_chatbot: bool = st.checkbox("💬 Only use chatbot (Disable Search)")
80
+
81
+ # Clear chat history button
82
+ if st.button("🧹 Clear Session"):
83
+ st.session_state.messages = []
84
+ st.rerun()
85
+
86
+ # Footer with dynamic year
87
+ st.markdown(f"<h6>📅 Copyright © 2010-{current_year()} Present</h6>", unsafe_allow_html=True)
88
+
89
+ # ============================ CHAT HISTORY SETUP ============================
90
+
91
+ # Initialize chat history
92
+ if "messages" not in st.session_state:
93
+ st.session_state.messages: List[Dict[str, str]] = []
94
+
95
+ # Ensure messages are always a list of dictionaries
96
+ if not isinstance(st.session_state.messages, list) or not all(isinstance(msg, dict) for msg in st.session_state.messages):
97
+ st.session_state.messages = []
98
+
99
+ # Display past chat history in Streamlit chat UI
100
+ for message in st.session_state.messages:
101
+ with st.chat_message(message["role"]):
102
+ st.markdown(message["content"])
103
+
104
+ # ============================ CHAT INPUT & PROCESSING ============================
105
+
106
+ # Process user input in the chatbox
107
+ if prompt := st.chat_input("Ask anything!"):
108
+ st.chat_message("user").markdown(prompt)
109
+ st.session_state.messages.append({"role": "user", "content": prompt})
110
+
111
+ # **Initialize ref_table_string to hold search results**
112
+ ref_table_string: str = "**No references found.**"
113
+
114
+ try:
115
+ with st.spinner("Searching..."): # Show loading spinner
116
+ if only_use_chatbot:
117
+ response: str = "<empty>"
118
+ else:
119
+ # **Call async search function using `asyncio.run()`**
120
+ search_results: Dict[str, Any] = asyncio.run(
121
+ invoke_duckduckgo_news_search(query=prompt, location=location, num=num, time_filter=time_filter)
122
+ )
123
+
124
+ if search_results["status"] == "success":
125
+ md_data: List[Dict[str, Any]] = search_results["results"]
126
+ response = f"Here are your search results:\n{md_data}"
127
+
128
+ def clean_title(title: str) -> str:
129
+ """
130
+ Cleans the title by replacing '|' with '-' to ensure proper formatting.
131
+
132
+ Args:
133
+ title (str): The original title.
134
+
135
+ Returns:
136
+ str: The cleaned title with '|' replaced by '-'.
137
+ """
138
+ return title.replace("|", " - ").strip() # Replace '|' with ' - ' and remove leading/trailing spaces
139
+
140
+ def generate_star_rating(rating: str) -> str:
141
+ """
142
+ Converts a numeric rating into a star representation (supports half-stars).
143
+
144
+ Args:
145
+ rating (str): The rating value as a string.
146
+
147
+ Returns:
148
+ str: A string representation of the rating using stars (⭐) and half-stars (⭐½).
149
+ """
150
+ try:
151
+ rating_float: float = float(rating) # Convert rating to float
152
+ full_stars: int = int(rating_float) # Extract full stars
153
+ half_star: str = "⭐½" if (rating_float - full_stars) >= 0.5 else "" # Add half-star if needed
154
+ return "⭐" * full_stars + half_star # Construct final star rating
155
+ except ValueError:
156
+ return "N/A" # Fallback for non-numeric ratings
157
+
158
+ # Start building reference table with proper Markdown formatting
159
+ ref_table_string = "| Num | Title | Rating | Context |\n|---|------|--------|---------|\n"
160
+
161
+ for res in md_data:
162
+ # **Fix: Clean the title by replacing '|' with '-'**
163
+ title_cleaned = clean_title(res['title'])
164
+
165
+ # **Ensure the rating is always numeric before converting to stars**
166
+ raw_rating = str(res.get('rating', 'N/A')).strip() # Get rating and strip whitespace
167
+
168
+ # Fix: Only convert rating if it’s a valid number
169
+ if raw_rating.replace('.', '', 1).isdigit(): # Check if it’s a valid float
170
+ stars = generate_star_rating(raw_rating)
171
+ else:
172
+ stars = "N/A" # If it's text (like "MIT News"), default to "N/A"
173
+
174
+ # **Ensure proper clickable links in the Title column**
175
+ if res.get('link', '').startswith("http"): # Ensure link exists and is valid
176
+ title = f"[{title_cleaned}]({res['link']})"
177
+ else:
178
+ title = title_cleaned # Fallback to text-only title
179
+
180
+ # **Properly format Context column (limit to 100 chars)**
181
+ context_summary = res.get('summary', '').strip() # Ensure it's a string and strip spaces
182
+ summary = context_summary[:100] + "..." if len(context_summary) > 100 else context_summary
183
+
184
+ # **Final row construction**
185
+ ref_table_string += f"| {res['num']} | {title} | {stars} | {summary} |\n"
186
+
187
+ # **Generate chatbot response based on search results or chat history**
188
+ bot = ChatBot()
189
+ bot.history = st.session_state.messages.copy()
190
+ response = bot.generate_response(
191
+ f"""
192
+ User prompt: {prompt}
193
+ Search results: {response}
194
+ Context: {[res['summary'] for res in search_results.get("results", [])]}
195
+ If search results exist, use them for the answer.
196
+ Otherwise, generate a response based on chat history.
197
+ """
198
+ )
199
+
200
+ except Exception as e:
201
+ st.warning(f"Error fetching data: {e}")
202
+ response = "We encountered an issue. Please try again later."
203
+
204
+ # **Convert response to audio**
205
+ save_to_audio(response)
206
+
207
+ # **Display assistant response in chat UI**
208
+ with st.chat_message("assistant"):
209
+ st.markdown(response, unsafe_allow_html=True)
210
+ st.audio("output.mp3", format="audio/mpeg", loop=True)
211
+ with st.expander("References:", expanded=True):
212
+ st.markdown(ref_table_string, unsafe_allow_html=True)
213
+
214
+ # **Update chat history with final response**
215
+ final_response: str = f"{response}\n\n{ref_table_string}"
216
+ st.session_state.messages.append({"role": "assistant", "content": final_response})
helper.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import os
4
+ import subprocess
5
+ import urllib
6
+ from datetime import datetime
7
+ from typing import Dict, List, Any, Optional
8
+ import requests
9
+ import re
10
+ from bs4 import BeautifulSoup
11
+ from gtts import gTTS
12
+ from logger.app_logger import app_logger
13
+
14
+ # ============================ CHATBOT CLASS ============================
15
+
16
+ class ChatBot:
17
+ """
18
+ A chatbot class that interacts with a local Llama model using Ollama.
19
+ """
20
+
21
+ def __init__(self) -> None:
22
+ """Initialize the ChatBot instance with a conversation history."""
23
+ self.history: List[Dict[str, str]] = [{"role": "system", "content": "You are a helpful assistant."}]
24
+ app_logger.log_info("ChatBot instance initialized", level="INFO")
25
+
26
+ def generate_response(self, prompt: str) -> str:
27
+ """
28
+ Generate a response from the chatbot based on the user's prompt.
29
+
30
+ Args:
31
+ prompt (str): The input message from the user.
32
+
33
+ Returns:
34
+ str: The chatbot's response to the provided prompt.
35
+ """
36
+ self.history.append({"role": "user", "content": prompt})
37
+ # app_logger.log_info(f"User prompt added to history: {prompt}", level="INFO")
38
+ app_logger.log_info("User prompt added to history", level="INFO")
39
+
40
+ # Convert chat history into a string for subprocess input
41
+ conversation = "\n".join(f"{msg['role']}: {msg['content']}" for msg in self.history)
42
+
43
+ try:
44
+ # Run the Llama model using Ollama
45
+ completion = subprocess.run(
46
+ ["ollama", "run", "llama3.2:latest"],
47
+ input=conversation,
48
+ capture_output=True,
49
+ text=True,
50
+ )
51
+
52
+ if completion.returncode != 0:
53
+ app_logger.log_error(f"Error running subprocess: {completion.stderr}")
54
+ return "I'm sorry, I encountered an issue processing your request."
55
+
56
+ response = completion.stdout.strip()
57
+ self.history.append({"role": "assistant", "content": response})
58
+ # app_logger.log_info(f"Assistant response generated: {response}", level="INFO")
59
+ app_logger.log_info("Assistant response generated", level="INFO")
60
+
61
+ return response
62
+
63
+ except Exception as e:
64
+ app_logger.log_error(f"Error sending query to the model: {e}")
65
+ return "I'm sorry, an error occurred while processing your request."
66
+
67
+ async def rate_body_of_article(self, article_title: str, article_content: str) -> str:
68
+ """
69
+ Rate the quality of an article's content based on its title.
70
+
71
+ Args:
72
+ article_title (str): The title of the article.
73
+ article_content (str): The full content of the article.
74
+
75
+ Returns:
76
+ str: A rating between 1 and 5 based on relevance and quality.
77
+ """
78
+ prompt = f"""
79
+ Given the following article title and content, provide a rating between 1 and 5
80
+ based on how well the content aligns with the title and its overall quality.
81
+
82
+ - **Article Title**: {article_title}
83
+ - **Article Content**: {article_content[:1000]} # Limit to first 1000 chars
84
+
85
+ **Instructions:**
86
+ - The rating should be a whole number between 1 and 5.
87
+ - Base your score on accuracy, clarity, and relevance.
88
+ - Only return a single numeric value (1-5) with no extra text.
89
+
90
+ **Example Output:**
91
+ `4` or `2` or `3.5` or `1.5`
92
+ """
93
+
94
+ try:
95
+ # Run the Llama model using Ollama
96
+ completion = subprocess.run(
97
+ ["ollama", "run", "llama3.2:latest"],
98
+ input=prompt,
99
+ capture_output=True,
100
+ text=True,
101
+ )
102
+
103
+ if completion.returncode != 0:
104
+ app_logger.log_error(f"Error running subprocess: {completion.stderr}")
105
+ return "Error"
106
+
107
+ response = completion.stdout.strip()
108
+
109
+ # Validate the rating is within the expected range
110
+ if response.isdigit() and 1 <= int(response) <= 5:
111
+ self.history.append({"role": "assistant", "content": response})
112
+ app_logger.log_info(f"Article rated: {response}", level="INFO")
113
+ return response
114
+ else:
115
+ app_logger.log_warning(f"Invalid rating received: {response}")
116
+ return "Error"
117
+
118
+ except Exception as e:
119
+ app_logger.log_error(f"Error sending query to the model: {e}")
120
+ return "Error"
121
+
122
+
123
+ # ============================ EXTRACT NEWS BODY ============================
124
+
125
+ def extract_news_body(news_url: str) -> str:
126
+ """
127
+ Extract the full article body from a given news URL.
128
+
129
+ Args:
130
+ news_url (str): The URL of the news article.
131
+
132
+ Returns:
133
+ str: Extracted full article content.
134
+ """
135
+ try:
136
+ headers = {
137
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
138
+ }
139
+
140
+ response = requests.get(news_url, headers=headers, timeout=5)
141
+ if response.status_code != 200:
142
+ app_logger.log_error(f"Failed to fetch article: {response.status_code}")
143
+ return "Failed to fetch article."
144
+
145
+ soup = BeautifulSoup(response.text, "html.parser")
146
+ paragraphs = soup.find_all("p")
147
+
148
+ # Extract and return cleaned text
149
+ article_content = "\n".join([p.text.strip() for p in paragraphs if p.text.strip()])
150
+ app_logger.log_info(f"Article content extracted from {news_url}", level="INFO")
151
+ return article_content
152
+
153
+ except Exception as e:
154
+ app_logger.log_error(f"Error extracting article content: {e}")
155
+ return f"Error extracting article content: {e}"
156
+
157
+
158
+ # ============================ ASYNC NEWS SCRAPING ============================
159
+
160
+ async def invoke_duckduckgo_news_search(query: str, num: int = 5, location: str = "us-en", time_filter: str = "w") -> \
161
+ Dict[str, Any]:
162
+ """
163
+ Perform a DuckDuckGo News search, extract news headlines, fetch full content,
164
+ and rate articles using parallel asynchronous processing.
165
+
166
+ Args:
167
+ query (str): The search query string.
168
+ num (int): Number of search results to retrieve.
169
+ location (str): The region code for location-based results (e.g., 'us-en', 'in-en').
170
+ time_filter (str): Time filter for news ('d' = past day, 'w' = past week, 'm' = past month, 'y' = past year).
171
+
172
+ Returns:
173
+ Dict[str, Any]: A dictionary containing extracted news articles.
174
+ """
175
+ app_logger.log_info(f"Starting DuckDuckGo news search for query: {query}", level="INFO")
176
+
177
+ duckduckgo_news_url = f"https://duckduckgo.com/html/?q={query.replace(' ', '+')}&kl={location}&df={time_filter}&ia=news"
178
+ headers = {"User-Agent": "Mozilla/5.0"}
179
+
180
+ response = requests.get(duckduckgo_news_url, headers=headers)
181
+ if response.status_code != 200:
182
+ app_logger.log_error(f"Failed to fetch news search results: {response.status_code}")
183
+ return {"status": "error", "message": "Failed to fetch news search results"}
184
+
185
+ soup = BeautifulSoup(response.text, "html.parser")
186
+ search_results = soup.find_all("div", class_="result__body")
187
+
188
+ async def process_article(result, index: int) -> Optional[Dict[str, Any]]:
189
+ """Processes a single article: extracts details, fetches content, and rates it."""
190
+ try:
191
+ title_tag = result.find("a", class_="result__a")
192
+ if not title_tag:
193
+ app_logger.log_warning(f"Title tag not found for result index {index}")
194
+ return None
195
+
196
+ title = title_tag.text.strip()
197
+ raw_link = title_tag["href"]
198
+
199
+ match = re.search(r"uddg=(https?%3A%2F%2F[^&]+)", raw_link)
200
+ link = urllib.parse.unquote(match.group(1)) if match else "Unknown Link"
201
+
202
+ snippet_tag = result.find("a", class_="result__snippet")
203
+ summary = snippet_tag.text.strip() if snippet_tag else "No summary available."
204
+
205
+ article_content = extract_news_body(link)
206
+
207
+ bot = ChatBot()
208
+ rating = await bot.rate_body_of_article(title, article_content)
209
+
210
+ app_logger.log_info(f"Processed article: {title}", level="INFO")
211
+
212
+ return {
213
+ "num": index + 1,
214
+ "link": link,
215
+ "title": title,
216
+ "summary": summary,
217
+ "body": article_content,
218
+ "rating": rating
219
+ }
220
+
221
+ except Exception as e:
222
+ app_logger.log_error(f"Error processing article: {e}")
223
+ return None
224
+
225
+ tasks = [process_article(result, index) for index, result in enumerate(search_results[:num])]
226
+ extracted_results = await asyncio.gather(*tasks)
227
+
228
+ extracted_results = [res for res in extracted_results if res is not None]
229
+
230
+ if extracted_results:
231
+ app_logger.log_info(f"News search completed successfully with {len(extracted_results)} results", level="INFO")
232
+ return {"status": "success", "results": extracted_results}
233
+ else:
234
+ app_logger.log_error("No valid news search results found")
235
+ return {"status": "error", "message": "No valid news search results found"}
236
+
237
+
238
+ # ============================ UTILITY FUNCTIONS ============================
239
+
240
+ def current_year() -> int:
241
+ """Returns the current year as an integer."""
242
+ return datetime.now().year
243
+
244
+
245
+ def save_to_audio(text: str) -> None:
246
+ """Converts text to an audio file using Google Text-to-Speech (gTTS)."""
247
+ try:
248
+ tts = gTTS(text=text, lang="en")
249
+ tts.save("output.mp3")
250
+ app_logger.log_info("Response converted to audio", level="INFO")
251
+ except Exception as e:
252
+ app_logger.log_error(f"Error converting response to audio: {e}")
logger/__init__.py ADDED
File without changes
logger/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (166 Bytes). View file
 
logger/__pycache__/app_logger.cpython-312.pyc ADDED
Binary file (4.73 kB). View file
 
logger/app_logger.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from loguru import logger as loguru_logger
3
+ from typing import Any, Generator
4
+ from contextlib import contextmanager
5
+ # Define log file path
6
+ LOG_FILE = "logs/app.log"
7
+
8
+ # Ensure the logs directory exists
9
+ os.makedirs("logs", exist_ok=True)
10
+
11
+ # Configure Loguru Logger
12
+ loguru_logger.add(
13
+ LOG_FILE,
14
+ rotation="1 day",
15
+ retention="10 days",
16
+ format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
17
+ "<level>{level: <8}</level> | "
18
+ "<cyan>{file}</cyan>:<cyan>{name}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
19
+ )
20
+
21
+
22
+ class AppLogger:
23
+ """
24
+ Logging class using Loguru for structured logging.
25
+ Provides synchronous and asynchronous logging capabilities.
26
+ """
27
+
28
+ def __init__(self):
29
+ pass
30
+
31
+ def log_info(self, *args: Any, **kwargs: Any) -> None:
32
+ """Synchronous logging with level selection."""
33
+ level = kwargs.pop("level", "INFO")
34
+ message = " ".join(map(str, args))
35
+ loguru_logger.opt(depth=1).log(level, message, **kwargs)
36
+
37
+ async def log_info_async(self, *args: Any, **kwargs: Any) -> None:
38
+ """Asynchronous logging for async functions."""
39
+ level = kwargs.pop("level", "INFO")
40
+ message = " ".join(map(str, args))
41
+ loguru_logger.opt(depth=1).log(level, message, **kwargs)
42
+
43
+ def log_error(self, *args: Any, **kwargs: Any) -> None:
44
+ """Synchronous error logging."""
45
+ message = " ".join(map(str, args))
46
+ loguru_logger.opt(depth=1).error(message, **kwargs)
47
+
48
+ async def log_error_async(self, *args: Any, **kwargs: Any) -> None:
49
+ """Asynchronous error logging."""
50
+ message = " ".join(map(str, args))
51
+ loguru_logger.opt(depth=1).error(message, **kwargs)
52
+
53
+ def log_debug(self, *args: Any, **kwargs: Any) -> None:
54
+ """Synchronous debug logging."""
55
+ message = " ".join(map(str, args))
56
+ loguru_logger.opt(depth=1).debug(message, **kwargs)
57
+
58
+ async def log_debug_async(self, *args: Any, **kwargs: Any) -> None:
59
+ """Asynchronous debug logging."""
60
+ message = " ".join(map(str, args))
61
+ loguru_logger.opt(depth=1).debug(message, **kwargs)
62
+
63
+ def log_warning(self, *args: Any, **kwargs: Any) -> None:
64
+ """Synchronous warning logging."""
65
+ message = " ".join(map(str, args))
66
+ loguru_logger.opt(depth=1).warning(message, **kwargs)
67
+
68
+ async def log_warning_async(self, *args: Any, **kwargs: Any) -> None:
69
+ """Asynchronous warning logging."""
70
+ message = " ".join(map(str, args))
71
+ loguru_logger.opt(depth=1).warning(message, **kwargs)
72
+
73
+
74
+ # Instantiate global logger instance
75
+ app_logger = AppLogger()
logs/app.log ADDED
The diff for this file is too large to render. See raw diff
 
output.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:806ab164eee42905afa55bd61329e4ed93074eef9793b844388ad2045b134cef
3
+ size 632064
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ google-search-results
2
+ openai
3
+ streamlit
4
+ serpapi
5
+ gTTS
6
+ bs4
7
+ loguru