Spaces:
Sleeping
Sleeping
File size: 11,963 Bytes
04e49dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 |
import gradio as gr
import pandas as pd
import numpy as np
from datetime import datetime
import os
from utils import upload_to_hf_dataset, download_from_hf_dataset
import dotenv
# Load environment variables from .env file
dotenv.load_dotenv()
#Read HF_TOKEN from .env file
HF_TOKEN = os.getenv("HF_TOKEN")
startdate = "2020-01-01"
enddate = "2025-07-01"
#Filename of parquet file on HuggingFace
# file_path = "marketsession_polygon_2020-01-01_2025-07-01.parquet"
file_path = f"marketsession_polygon_{startdate}_{enddate}.parquet"
file_path = f"{os.path.basename(file_path)}_with_premarketvolume900K_marketcap1B.parquet"
#Downloading parquet file on HuggingFace
download_from_hf_dataset(file_path = file_path, dataset_name= "AmirTrader/PennyStocks", token=HF_TOKEN, repo_type="dataset")
# Load the parquet file into a DataFrame
df_org = pd.read_parquet(file_path)
displayCols = ['Ticker', 'premarket_volume', 'marketcap(M$)', 'SharesFloat(M)', 'Rotation', 'datetime', 'Sector', 'premarket_change_from_perviousday_perc']
preferedCols = ['Ticker', 'premarket_volume', 'marketcap', 'Shares Float', 'Rotation', 'datetime',
'Sector',
'premarket_change_from_perviousday_perc',
'premarket_change_from_perviousday_high_perc',
'high_closepermarketperc', 'low_closepermarketperc',
'close_closepermarketperc', 'marketsession_3min_closepermarketperc',
'marketsession_5min_closepermarketperc',
'marketsession_10min_closepermarketperc',
'marketsession_15min_closepermarketperc',
'marketsession_30min_closepermarketperc',
'marketsession_60min_closepermarketperc',
'marketsession_120min_closepermarketperc'
]
df = df_org[preferedCols]
# Convert 'marketcap' to numeric, removing commas and converting to billions
# Step 1: Clean formatting (remove commas, if any)
df['Shares Float'] = df['Shares Float'].replace(',', '', regex=True)
# Step 2: Convert to numeric safely
df['Shares Float'] = pd.to_numeric(df['Shares Float'], errors='coerce')
# Step 3: Convert to millions with 3 decimal precision
df['SharesFloat(M)'] = (df['Shares Float'] / 1_000_000).round(3)
# Find all columns that include 'perc' in their name
perc_columns = [col for col in df.columns if 'perc' in col.lower()]
# Convert each to numeric, divide by 100, and round to 1 decimal
for col in perc_columns:
df[col] = pd.to_numeric(df[col], errors='coerce') # ensure numeric
df[col] = (df[col] / 100).round(1)
# convert datetime columns to datetime type
df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
# Convert Rotation columsn to 2 decimal
df['Rotation'] = pd.to_numeric(df['Rotation'], errors='coerce').round(2)
#rename marketcap column to marketcap marketcap(M$)
df.rename(columns={'marketcap': 'marketcap(M$)'}, inplace=True)
# Global variables to store filter state
current_page = 0
filtered_df = None
current_query = ""
def get_total_pages():
global filtered_df
if filtered_df is None or len(filtered_df) == 0:
return 1
page_size = 20
return (len(filtered_df) + page_size - 1) // page_size
def filter_dataframe(start_dt, end_dt, query_text=""):
global filtered_df, current_page, current_query
current_page = 0 # Reset to first page when filtering
current_query = query_text
try:
# Start with the full dataset
working_df = df.copy()
# Apply date filter if provided
if start_dt and end_dt:
# Convert to datetime if they're strings
if isinstance(start_dt, str):
start = pd.to_datetime(start_dt)
else:
start = start_dt
if isinstance(end_dt, str):
end = pd.to_datetime(end_dt)
else:
end = end_dt
# Validate date range
if start > end:
return pd.DataFrame({"Error": ["Start date must be before end date"]}), "Page 1 of 1", ""
# Filter dataframe by date
mask = (working_df['datetime'] >= start) & (working_df['datetime'] <= end)
working_df = working_df.loc[mask]
# Apply query filter if provided
if query_text and query_text.strip():
try:
# Execute the query on the working dataframe
working_df = working_df.query(query_text.strip())
query_status = f"β
Query executed successfully. Found {len(working_df)} rows."
except Exception as query_error:
query_status = f"β Query error: {str(query_error)}"
# If query fails, show the error but continue with date-filtered data
pass
else:
query_status = ""
# Apply display columns filter
filtered_df = working_df[displayCols].copy() if not working_df.empty else pd.DataFrame()
return paginate_data(), get_page_info(), query_status
except Exception as e:
return pd.DataFrame({"Error": [f"Error processing request: {str(e)}"]}), "Error", f"β Error: {str(e)}"
def execute_query_only(query_text):
"""Execute query without changing date filters"""
global filtered_df, current_page, current_query
current_page = 0 # Reset to first page when querying
current_query = query_text
try:
# Start with current filtered data or full dataset
if filtered_df is not None and not filtered_df.empty:
# Get the current date-filtered data from the main df
working_df = df.copy()
# We need to reapply any existing date filters, but for now we'll work with full dataset
# In a more sophisticated implementation, we'd store the date filter state
else:
working_df = df.copy()
# Apply query filter if provided
if query_text and query_text.strip():
try:
# Execute the query on the working dataframe
working_df = working_df.query(query_text.strip())
query_status = f"β
Query executed successfully. Found {len(working_df)} rows."
except Exception as query_error:
query_status = f"β Query error: {str(query_error)}"
# If query fails, return current data
return paginate_data(), get_page_info(), query_status
else:
query_status = ""
# Apply display columns filter
filtered_df = working_df[displayCols].copy() if not working_df.empty else pd.DataFrame()
return paginate_data(), get_page_info(), query_status
except Exception as e:
return paginate_data(), get_page_info(), f"β Error: {str(e)}"
def paginate_data():
global filtered_df, current_page
if filtered_df is None or len(filtered_df) == 0:
return pd.DataFrame()
page_size = 20
total_pages = get_total_pages()
# Ensure page is within bounds
current_page = max(0, min(current_page, total_pages - 1))
start_i = current_page * page_size
page_df = filtered_df.iloc[start_i:start_i + page_size].reset_index(drop=True)
return page_df
def get_page_info():
global current_page
total_pages = get_total_pages()
total_rows = len(filtered_df) if filtered_df is not None else 0
return f"Page {current_page + 1} of {total_pages} (Total rows: {total_rows})"
def go_previous():
global current_page
if current_page > 0:
current_page -= 1
return paginate_data(), get_page_info()
def go_next():
global current_page
total_pages = get_total_pages()
if current_page < total_pages - 1:
current_page += 1
return paginate_data(), get_page_info()
def reset_filters():
global current_page, current_query
current_page = 0
current_query = ""
return startdate, enddate, ""
def get_column_info():
"""Return information about available columns for querying"""
info = "Available columns for querying:\n"
for col in displayCols:
dtype = str(df[col].dtype)
info += f"β’ `{col}` ({dtype})\n"
info += "\nExample queries:\n"
info += "β’ `premarket_volume > 100000`\n"
info += "β’ `Sector == 'Technology'`\n"
info += "β’ `Rotation > 1.5 and premarket_volume > 50000`\n"
info += "β’ `Ticker.str.contains('AA', na=False)`\n"
return info
with gr.Blocks(css="""
.dataframe table {
font-size: 10px !important;
}
.dataframe th, .dataframe td {
padding: 4px 8px !important;
font-size: 10px !important;
}
.dataframe thead th {
font-size: 10px !important;
font-weight: bold !important;
}
.query-info {
font-family: monospace;
font-size: 12px;
background-color: #f8f9fa;
padding: 10px;
border-radius: 5px;
margin: 10px 0;
}
""") as demo:
gr.Markdown("## π§ͺ Micro Cap Lab!")
with gr.Row():
# Use Textbox instead of DateTime for better compatibility
start_picker = gr.Textbox(
label="Start Date (YYYY-MM-DD)",
value=startdate,
placeholder=startdate
)
end_picker = gr.Textbox(
label="End Date (YYYY-MM-DD)",
value=enddate,
placeholder=enddate
)
# Query section
with gr.Row():
with gr.Column(scale=4):
query_input = gr.Textbox(
label="DataFrame Query",
placeholder="e.g., premarket_volume > 100000",
lines=2,
info="Enter pandas query expression (use backticks for column names with spaces)"
)
with gr.Column(scale=1):
query_btn = gr.Button("Execute Query", variant="primary")
query_status = gr.Textbox(
label="Query Status",
interactive=False,
visible=True
)
# Column information (collapsible)
with gr.Accordion("π Column Information & Query Examples", open=False):
column_info = gr.Textbox(
value=get_column_info(),
label="",
interactive=False,
lines=15,
elem_classes=["query-info"]
)
output = gr.Dataframe(
label="Filtered Table",
interactive=False
)
# Pagination controls
with gr.Row():
prev_btn = gr.Button("β Previous", variant="secondary")
page_info = gr.Textbox(
value="Page 1 of 1",
interactive=False,
show_label=False,
container=False
)
next_btn = gr.Button("Next β", variant="secondary")
with gr.Row():
apply_btn = gr.Button("Apply Date Filter", variant="primary")
reset_btn = gr.Button("Reset All", variant="secondary")
# Event handlers
apply_btn.click(
fn=filter_dataframe,
inputs=[start_picker, end_picker, query_input],
outputs=[output, page_info, query_status]
)
query_btn.click(
fn=execute_query_only,
inputs=[query_input],
outputs=[output, page_info, query_status]
)
prev_btn.click(
fn=go_previous,
inputs=[],
outputs=[output, page_info]
)
next_btn.click(
fn=go_next,
inputs=[],
outputs=[output, page_info]
)
reset_btn.click(
fn=reset_filters,
inputs=[],
outputs=[start_picker, end_picker, query_input]
).then(
fn=filter_dataframe,
inputs=[start_picker, end_picker, query_input],
outputs=[output, page_info, query_status]
)
# Load initial data
demo.load(
fn=filter_dataframe,
inputs=[start_picker, end_picker, query_input],
outputs=[output, page_info, query_status]
)
if __name__ == "__main__":
demo.launch() |