AmirTrader commited on
Commit
04e49dd
Β·
verified Β·
1 Parent(s): df79e79

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +360 -0
app.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ from datetime import datetime
6
+ import os
7
+
8
+ from utils import upload_to_hf_dataset, download_from_hf_dataset
9
+
10
+ import dotenv
11
+
12
+ # Load environment variables from .env file
13
+ dotenv.load_dotenv()
14
+
15
+ #Read HF_TOKEN from .env file
16
+ HF_TOKEN = os.getenv("HF_TOKEN")
17
+
18
+ startdate = "2020-01-01"
19
+ enddate = "2025-07-01"
20
+
21
+ #Filename of parquet file on HuggingFace
22
+ # file_path = "marketsession_polygon_2020-01-01_2025-07-01.parquet"
23
+ file_path = f"marketsession_polygon_{startdate}_{enddate}.parquet"
24
+ file_path = f"{os.path.basename(file_path)}_with_premarketvolume900K_marketcap1B.parquet"
25
+
26
+ #Downloading parquet file on HuggingFace
27
+ download_from_hf_dataset(file_path = file_path, dataset_name= "AmirTrader/PennyStocks", token=HF_TOKEN, repo_type="dataset")
28
+ # Load the parquet file into a DataFrame
29
+ df_org = pd.read_parquet(file_path)
30
+
31
+
32
+ displayCols = ['Ticker', 'premarket_volume', 'marketcap(M$)', 'SharesFloat(M)', 'Rotation', 'datetime', 'Sector', 'premarket_change_from_perviousday_perc']
33
+
34
+ preferedCols = ['Ticker', 'premarket_volume', 'marketcap', 'Shares Float', 'Rotation', 'datetime',
35
+ 'Sector',
36
+ 'premarket_change_from_perviousday_perc',
37
+ 'premarket_change_from_perviousday_high_perc',
38
+
39
+ 'high_closepermarketperc', 'low_closepermarketperc',
40
+ 'close_closepermarketperc', 'marketsession_3min_closepermarketperc',
41
+ 'marketsession_5min_closepermarketperc',
42
+ 'marketsession_10min_closepermarketperc',
43
+ 'marketsession_15min_closepermarketperc',
44
+ 'marketsession_30min_closepermarketperc',
45
+ 'marketsession_60min_closepermarketperc',
46
+ 'marketsession_120min_closepermarketperc'
47
+ ]
48
+
49
+ df = df_org[preferedCols]
50
+
51
+ # Convert 'marketcap' to numeric, removing commas and converting to billions
52
+ # Step 1: Clean formatting (remove commas, if any)
53
+ df['Shares Float'] = df['Shares Float'].replace(',', '', regex=True)
54
+ # Step 2: Convert to numeric safely
55
+ df['Shares Float'] = pd.to_numeric(df['Shares Float'], errors='coerce')
56
+
57
+ # Step 3: Convert to millions with 3 decimal precision
58
+ df['SharesFloat(M)'] = (df['Shares Float'] / 1_000_000).round(3)
59
+
60
+ # Find all columns that include 'perc' in their name
61
+ perc_columns = [col for col in df.columns if 'perc' in col.lower()]
62
+
63
+ # Convert each to numeric, divide by 100, and round to 1 decimal
64
+ for col in perc_columns:
65
+ df[col] = pd.to_numeric(df[col], errors='coerce') # ensure numeric
66
+ df[col] = (df[col] / 100).round(1)
67
+
68
+ # convert datetime columns to datetime type
69
+ df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
70
+
71
+ # Convert Rotation columsn to 2 decimal
72
+ df['Rotation'] = pd.to_numeric(df['Rotation'], errors='coerce').round(2)
73
+
74
+ #rename marketcap column to marketcap marketcap(M$)
75
+ df.rename(columns={'marketcap': 'marketcap(M$)'}, inplace=True)
76
+
77
+ # Global variables to store filter state
78
+ current_page = 0
79
+ filtered_df = None
80
+ current_query = ""
81
+
82
+ def get_total_pages():
83
+ global filtered_df
84
+ if filtered_df is None or len(filtered_df) == 0:
85
+ return 1
86
+ page_size = 20
87
+ return (len(filtered_df) + page_size - 1) // page_size
88
+
89
+ def filter_dataframe(start_dt, end_dt, query_text=""):
90
+ global filtered_df, current_page, current_query
91
+ current_page = 0 # Reset to first page when filtering
92
+ current_query = query_text
93
+
94
+ try:
95
+ # Start with the full dataset
96
+ working_df = df.copy()
97
+
98
+ # Apply date filter if provided
99
+ if start_dt and end_dt:
100
+ # Convert to datetime if they're strings
101
+ if isinstance(start_dt, str):
102
+ start = pd.to_datetime(start_dt)
103
+ else:
104
+ start = start_dt
105
+
106
+ if isinstance(end_dt, str):
107
+ end = pd.to_datetime(end_dt)
108
+ else:
109
+ end = end_dt
110
+
111
+ # Validate date range
112
+ if start > end:
113
+ return pd.DataFrame({"Error": ["Start date must be before end date"]}), "Page 1 of 1", ""
114
+
115
+ # Filter dataframe by date
116
+ mask = (working_df['datetime'] >= start) & (working_df['datetime'] <= end)
117
+ working_df = working_df.loc[mask]
118
+
119
+ # Apply query filter if provided
120
+ if query_text and query_text.strip():
121
+ try:
122
+ # Execute the query on the working dataframe
123
+ working_df = working_df.query(query_text.strip())
124
+ query_status = f"βœ… Query executed successfully. Found {len(working_df)} rows."
125
+ except Exception as query_error:
126
+ query_status = f"❌ Query error: {str(query_error)}"
127
+ # If query fails, show the error but continue with date-filtered data
128
+ pass
129
+ else:
130
+ query_status = ""
131
+
132
+ # Apply display columns filter
133
+ filtered_df = working_df[displayCols].copy() if not working_df.empty else pd.DataFrame()
134
+
135
+ return paginate_data(), get_page_info(), query_status
136
+
137
+ except Exception as e:
138
+ return pd.DataFrame({"Error": [f"Error processing request: {str(e)}"]}), "Error", f"❌ Error: {str(e)}"
139
+
140
+ def execute_query_only(query_text):
141
+ """Execute query without changing date filters"""
142
+ global filtered_df, current_page, current_query
143
+ current_page = 0 # Reset to first page when querying
144
+ current_query = query_text
145
+
146
+ try:
147
+ # Start with current filtered data or full dataset
148
+ if filtered_df is not None and not filtered_df.empty:
149
+ # Get the current date-filtered data from the main df
150
+ working_df = df.copy()
151
+ # We need to reapply any existing date filters, but for now we'll work with full dataset
152
+ # In a more sophisticated implementation, we'd store the date filter state
153
+ else:
154
+ working_df = df.copy()
155
+
156
+ # Apply query filter if provided
157
+ if query_text and query_text.strip():
158
+ try:
159
+ # Execute the query on the working dataframe
160
+ working_df = working_df.query(query_text.strip())
161
+ query_status = f"βœ… Query executed successfully. Found {len(working_df)} rows."
162
+ except Exception as query_error:
163
+ query_status = f"❌ Query error: {str(query_error)}"
164
+ # If query fails, return current data
165
+ return paginate_data(), get_page_info(), query_status
166
+ else:
167
+ query_status = ""
168
+
169
+ # Apply display columns filter
170
+ filtered_df = working_df[displayCols].copy() if not working_df.empty else pd.DataFrame()
171
+
172
+ return paginate_data(), get_page_info(), query_status
173
+
174
+ except Exception as e:
175
+ return paginate_data(), get_page_info(), f"❌ Error: {str(e)}"
176
+
177
+ def paginate_data():
178
+ global filtered_df, current_page
179
+ if filtered_df is None or len(filtered_df) == 0:
180
+ return pd.DataFrame()
181
+
182
+ page_size = 20
183
+ total_pages = get_total_pages()
184
+
185
+ # Ensure page is within bounds
186
+ current_page = max(0, min(current_page, total_pages - 1))
187
+
188
+ start_i = current_page * page_size
189
+ page_df = filtered_df.iloc[start_i:start_i + page_size].reset_index(drop=True)
190
+
191
+ return page_df
192
+
193
+ def get_page_info():
194
+ global current_page
195
+ total_pages = get_total_pages()
196
+ total_rows = len(filtered_df) if filtered_df is not None else 0
197
+ return f"Page {current_page + 1} of {total_pages} (Total rows: {total_rows})"
198
+
199
+ def go_previous():
200
+ global current_page
201
+ if current_page > 0:
202
+ current_page -= 1
203
+ return paginate_data(), get_page_info()
204
+
205
+ def go_next():
206
+ global current_page
207
+ total_pages = get_total_pages()
208
+ if current_page < total_pages - 1:
209
+ current_page += 1
210
+ return paginate_data(), get_page_info()
211
+
212
+ def reset_filters():
213
+ global current_page, current_query
214
+ current_page = 0
215
+ current_query = ""
216
+ return startdate, enddate, ""
217
+
218
+ def get_column_info():
219
+ """Return information about available columns for querying"""
220
+ info = "Available columns for querying:\n"
221
+ for col in displayCols:
222
+ dtype = str(df[col].dtype)
223
+ info += f"β€’ `{col}` ({dtype})\n"
224
+
225
+ info += "\nExample queries:\n"
226
+ info += "β€’ `premarket_volume > 100000`\n"
227
+ info += "β€’ `Sector == 'Technology'`\n"
228
+ info += "β€’ `Rotation > 1.5 and premarket_volume > 50000`\n"
229
+ info += "β€’ `Ticker.str.contains('AA', na=False)`\n"
230
+
231
+ return info
232
+
233
+ with gr.Blocks(css="""
234
+ .dataframe table {
235
+ font-size: 10px !important;
236
+ }
237
+ .dataframe th, .dataframe td {
238
+ padding: 4px 8px !important;
239
+ font-size: 10px !important;
240
+ }
241
+ .dataframe thead th {
242
+ font-size: 10px !important;
243
+ font-weight: bold !important;
244
+ }
245
+ .query-info {
246
+ font-family: monospace;
247
+ font-size: 12px;
248
+ background-color: #f8f9fa;
249
+ padding: 10px;
250
+ border-radius: 5px;
251
+ margin: 10px 0;
252
+ }
253
+ """) as demo:
254
+ gr.Markdown("## πŸ§ͺ Micro Cap Lab!")
255
+
256
+ with gr.Row():
257
+ # Use Textbox instead of DateTime for better compatibility
258
+ start_picker = gr.Textbox(
259
+ label="Start Date (YYYY-MM-DD)",
260
+ value=startdate,
261
+ placeholder=startdate
262
+ )
263
+ end_picker = gr.Textbox(
264
+ label="End Date (YYYY-MM-DD)",
265
+ value=enddate,
266
+ placeholder=enddate
267
+ )
268
+
269
+ # Query section
270
+ with gr.Row():
271
+ with gr.Column(scale=4):
272
+ query_input = gr.Textbox(
273
+ label="DataFrame Query",
274
+ placeholder="e.g., premarket_volume > 100000",
275
+ lines=2,
276
+ info="Enter pandas query expression (use backticks for column names with spaces)"
277
+ )
278
+ with gr.Column(scale=1):
279
+ query_btn = gr.Button("Execute Query", variant="primary")
280
+
281
+ query_status = gr.Textbox(
282
+ label="Query Status",
283
+ interactive=False,
284
+ visible=True
285
+ )
286
+
287
+ # Column information (collapsible)
288
+ with gr.Accordion("πŸ“‹ Column Information & Query Examples", open=False):
289
+ column_info = gr.Textbox(
290
+ value=get_column_info(),
291
+ label="",
292
+ interactive=False,
293
+ lines=15,
294
+ elem_classes=["query-info"]
295
+ )
296
+
297
+ output = gr.Dataframe(
298
+ label="Filtered Table",
299
+ interactive=False
300
+ )
301
+
302
+ # Pagination controls
303
+ with gr.Row():
304
+ prev_btn = gr.Button("← Previous", variant="secondary")
305
+ page_info = gr.Textbox(
306
+ value="Page 1 of 1",
307
+ interactive=False,
308
+ show_label=False,
309
+ container=False
310
+ )
311
+ next_btn = gr.Button("Next β†’", variant="secondary")
312
+
313
+ with gr.Row():
314
+ apply_btn = gr.Button("Apply Date Filter", variant="primary")
315
+ reset_btn = gr.Button("Reset All", variant="secondary")
316
+
317
+ # Event handlers
318
+ apply_btn.click(
319
+ fn=filter_dataframe,
320
+ inputs=[start_picker, end_picker, query_input],
321
+ outputs=[output, page_info, query_status]
322
+ )
323
+
324
+ query_btn.click(
325
+ fn=execute_query_only,
326
+ inputs=[query_input],
327
+ outputs=[output, page_info, query_status]
328
+ )
329
+
330
+ prev_btn.click(
331
+ fn=go_previous,
332
+ inputs=[],
333
+ outputs=[output, page_info]
334
+ )
335
+
336
+ next_btn.click(
337
+ fn=go_next,
338
+ inputs=[],
339
+ outputs=[output, page_info]
340
+ )
341
+
342
+ reset_btn.click(
343
+ fn=reset_filters,
344
+ inputs=[],
345
+ outputs=[start_picker, end_picker, query_input]
346
+ ).then(
347
+ fn=filter_dataframe,
348
+ inputs=[start_picker, end_picker, query_input],
349
+ outputs=[output, page_info, query_status]
350
+ )
351
+
352
+ # Load initial data
353
+ demo.load(
354
+ fn=filter_dataframe,
355
+ inputs=[start_picker, end_picker, query_input],
356
+ outputs=[output, page_info, query_status]
357
+ )
358
+
359
+ if __name__ == "__main__":
360
+ demo.launch()