Spaces:
Running
Running
Upload analysis.py
Browse files- src/services/analysis.py +21 -8
src/services/analysis.py
CHANGED
|
@@ -27,7 +27,7 @@ ORIGINAL_HTML_STYLE = """
|
|
| 27 |
|
| 28 |
ORIGINAL_MATCHED_HEADERS = ["Ticker", "Spot MrktCap", "Spot Volume", "Spot VTMR", "Futures Volume", "Futures VTMR", "OISS", "Funding Rate"]
|
| 29 |
ORIGINAL_FUTURES_HEADERS = ["Ticker", "Market Cap", "Volume", "VTMR", "OISS", "Funding Rate"]
|
| 30 |
-
ORIGINAL_SPOT_HEADERS = ["Ticker", "
|
| 31 |
|
| 32 |
class FileScanner:
|
| 33 |
"""Locates the latest Spot and Futures data files in the USER directory."""
|
|
@@ -79,10 +79,11 @@ class DataProcessor:
|
|
| 79 |
def load_spot(path: Path) -> pd.DataFrame:
|
| 80 |
print(f" Parsing Spot File: {path.name}")
|
| 81 |
try:
|
|
|
|
| 82 |
if path.suffix == '.html':
|
| 83 |
-
df = pd.read_html(path)[0]
|
| 84 |
else:
|
| 85 |
-
df = pd.read_csv(path)
|
| 86 |
df.columns = [c.lower().replace(' ', '_') for c in df.columns]
|
| 87 |
|
| 88 |
col_map = {
|
|
@@ -106,7 +107,8 @@ class DataProcessor:
|
|
| 106 |
break
|
| 107 |
|
| 108 |
if 'ticker' in df.columns:
|
| 109 |
-
|
|
|
|
| 110 |
print(f" Extracted {len(df)} spot tokens")
|
| 111 |
return df
|
| 112 |
except Exception as e:
|
|
@@ -123,6 +125,7 @@ class DataProcessor:
|
|
| 123 |
df_display[m] = ""
|
| 124 |
df_display = df_display[df_cols]
|
| 125 |
df_display.columns = headers
|
|
|
|
| 126 |
table_html = df_display.to_html(index=False, classes='table', escape=False)
|
| 127 |
return f'<div class="table-container"><h2>{title}</h2>{table_html}</div>'
|
| 128 |
|
|
@@ -144,7 +147,7 @@ class DataProcessor:
|
|
| 144 |
print(f" Futures high-quality filtering error: {e}")
|
| 145 |
valid_futures['vtmr_display'] = valid_futures['vtmr']
|
| 146 |
|
| 147 |
-
#
|
| 148 |
merged = pd.merge(spot_df, valid_futures, on='ticker', how='inner', suffixes=('_spot', '_fut'))
|
| 149 |
if 'vtmr_fut' in merged.columns:
|
| 150 |
merged = merged.sort_values('vtmr_fut', ascending=False)
|
|
@@ -254,13 +257,23 @@ def crypto_analysis_v4(user_keys, user_id) -> None:
|
|
| 254 |
|
| 255 |
# 2. Parse Files
|
| 256 |
futures_df = PDFParser.extract(futures_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
spot_df = DataProcessor.load_spot(spot_file)
|
| 258 |
|
| 259 |
-
# 3. Generate HTML
|
| 260 |
html_content = DataProcessor.generate_html_report(futures_df, spot_df)
|
| 261 |
-
|
| 262 |
if html_content:
|
| 263 |
-
#
|
| 264 |
pdf_path = convert_html_to_pdf(html_content, user_id)
|
| 265 |
|
| 266 |
print(" 🧹 Cleaning up source files after analysis...")
|
|
|
|
| 27 |
|
| 28 |
ORIGINAL_MATCHED_HEADERS = ["Ticker", "Spot MrktCap", "Spot Volume", "Spot VTMR", "Futures Volume", "Futures VTMR", "OISS", "Funding Rate"]
|
| 29 |
ORIGINAL_FUTURES_HEADERS = ["Ticker", "Market Cap", "Volume", "VTMR", "OISS", "Funding Rate"]
|
| 30 |
+
ORIGINAL_SPOT_HEADERS = ["Ticker", "MarketCap", "Volume", "VTMR"]
|
| 31 |
|
| 32 |
class FileScanner:
|
| 33 |
"""Locates the latest Spot and Futures data files in the USER directory."""
|
|
|
|
| 79 |
def load_spot(path: Path) -> pd.DataFrame:
|
| 80 |
print(f" Parsing Spot File: {path.name}")
|
| 81 |
try:
|
| 82 |
+
# Explicit UTF-8 for Unicode preservation
|
| 83 |
if path.suffix == '.html':
|
| 84 |
+
df = pd.read_html(str(path), encoding='utf-8')[0]
|
| 85 |
else:
|
| 86 |
+
df = pd.read_csv(path, encoding='utf-8')
|
| 87 |
df.columns = [c.lower().replace(' ', '_') for c in df.columns]
|
| 88 |
|
| 89 |
col_map = {
|
|
|
|
| 107 |
break
|
| 108 |
|
| 109 |
if 'ticker' in df.columns:
|
| 110 |
+
# Unicode-safe cleaning to protect Chinese characters
|
| 111 |
+
df['ticker'] = df['ticker'].apply(lambda x: str(x).strip().upper())
|
| 112 |
print(f" Extracted {len(df)} spot tokens")
|
| 113 |
return df
|
| 114 |
except Exception as e:
|
|
|
|
| 125 |
df_display[m] = ""
|
| 126 |
df_display = df_display[df_cols]
|
| 127 |
df_display.columns = headers
|
| 128 |
+
# escape=False is critical for rendering ticker links
|
| 129 |
table_html = df_display.to_html(index=False, classes='table', escape=False)
|
| 130 |
return f'<div class="table-container"><h2>{title}</h2>{table_html}</div>'
|
| 131 |
|
|
|
|
| 147 |
print(f" Futures high-quality filtering error: {e}")
|
| 148 |
valid_futures['vtmr_display'] = valid_futures['vtmr']
|
| 149 |
|
| 150 |
+
# Suffix-based merge to prevent blank column mapping issues
|
| 151 |
merged = pd.merge(spot_df, valid_futures, on='ticker', how='inner', suffixes=('_spot', '_fut'))
|
| 152 |
if 'vtmr_fut' in merged.columns:
|
| 153 |
merged = merged.sort_values('vtmr_fut', ascending=False)
|
|
|
|
| 257 |
|
| 258 |
# 2. Parse Files
|
| 259 |
futures_df = PDFParser.extract(futures_file)
|
| 260 |
+
|
| 261 |
+
# FIX: "First Uppercase Ticker" logic to prevent labeling shift
|
| 262 |
+
if not futures_df.empty and 'ticker' in futures_df.columns:
|
| 263 |
+
def refine_ticker(x):
|
| 264 |
+
text = str(x)
|
| 265 |
+
# Find all words that are entirely uppercase (2+ chars)
|
| 266 |
+
matches = re.findall(r'\b[A-Z]{2,}\b', text)
|
| 267 |
+
# Use the first match; fallback to original stripped value if none (e.g. Chinese)
|
| 268 |
+
return matches[0] if matches else text.strip().upper()
|
| 269 |
+
|
| 270 |
+
futures_df['ticker'] = futures_df['ticker'].apply(refine_ticker)
|
| 271 |
+
|
| 272 |
spot_df = DataProcessor.load_spot(spot_file)
|
| 273 |
|
|
|
|
| 274 |
html_content = DataProcessor.generate_html_report(futures_df, spot_df)
|
|
|
|
| 275 |
if html_content:
|
| 276 |
+
# Create PDF
|
| 277 |
pdf_path = convert_html_to_pdf(html_content, user_id)
|
| 278 |
|
| 279 |
print(" 🧹 Cleaning up source files after analysis...")
|