clean app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,17 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import Tuple
|
| 5 |
import pandas as pd
|
|
@@ -15,64 +27,29 @@ USE_DOTENV = False
|
|
| 15 |
ROOT = Path(__file__).parent
|
| 16 |
|
| 17 |
JSON_PATH = ROOT / "json"
|
| 18 |
-
|
|
|
|
| 19 |
DOTENV_PATH = ROOT.parent.parent / "apis" / ".env"
|
|
|
|
| 20 |
# DUCKDB_PATH = ROOT / "db" / "sss_vectordb.duckdb"
|
| 21 |
|
| 22 |
from src import front_dataset_handler as fdh, app_utils as utils, semantic_search as ss, env_options
|
| 23 |
tokens = env_options.check_env(use_dotenv=USE_DOTENV, dotenv_path=DOTENV_PATH, env_tokens = ["HF_TOKEN"])
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
#### CONEXIÓN DUCKDB A HUGGING FACE HUB ####
|
| 30 |
print("Initializing DuckDB connection...")
|
| 31 |
con = duckdb.connect()
|
| 32 |
-
hf_token = tokens.get("HF_TOKEN")
|
| 33 |
-
##################################
|
| 34 |
-
masked_hf_token = hf_token[:4] + "*" * (len(hf_token) - 8) + hf_token[-4:]
|
| 35 |
-
print(f"Using Hugging Face token: {masked_hf_token}")
|
| 36 |
-
##################################
|
| 37 |
-
|
| 38 |
-
hf_token = tokens.get("HF_TOKEN")
|
| 39 |
-
masked_hf_token = hf_token[:4] + "*" * (len(hf_token) - 8) + hf_token[-4:]
|
| 40 |
-
'''
|
| 41 |
-
create_secret_query = f"""
|
| 42 |
-
INSTALL httpfs;
|
| 43 |
-
LOAD httpfs;
|
| 44 |
-
CREATE PERSISTENT SECRET hf_token (
|
| 45 |
-
TYPE huggingface,
|
| 46 |
-
TOKEN '{hf_token}'
|
| 47 |
-
);
|
| 48 |
-
"""
|
| 49 |
-
'''
|
| 50 |
-
# con.sql(create_secret_query)
|
| 51 |
-
# print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
|
| 52 |
-
dataset_name = "reddgr/swift-stock-screener"
|
| 53 |
-
# con.sql(query="INSTALL vss; LOAD vss;")
|
| 54 |
-
|
| 55 |
-
create_secret_query = f"""
|
| 56 |
-
INSTALL httpfs;
|
| 57 |
-
LOAD httpfs;
|
| 58 |
-
CREATE PERSISTENT SECRET hf_token (
|
| 59 |
-
TYPE huggingface,
|
| 60 |
-
TOKEN '{hf_token}'
|
| 61 |
-
);
|
| 62 |
-
"""
|
| 63 |
-
con.sql(create_secret_query)
|
| 64 |
-
print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf().iloc[0,-2])
|
| 65 |
-
print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf().iloc[0,-1])
|
| 66 |
-
print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
|
| 67 |
|
| 68 |
-
# FROM 'hf://datasets/reddgr/swift-stock-screener/data/train-00000-of-00001.parquet';
|
| 69 |
create_table_query = f"""
|
| 70 |
INSTALL vss;
|
| 71 |
LOAD vss;
|
| 72 |
SET hnsw_enable_experimental_persistence = true;
|
| 73 |
CREATE TABLE vector_table AS
|
| 74 |
SELECT *, embeddings::float[{emb_model.get_sentence_embedding_dimension()}] as embeddings_float
|
| 75 |
-
FROM '
|
| 76 |
"""
|
| 77 |
|
| 78 |
con.sql(create_table_query)
|
|
@@ -83,28 +60,19 @@ create_index_query = f"""
|
|
| 83 |
"""
|
| 84 |
con.sql(create_index_query)
|
| 85 |
|
| 86 |
-
# print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
|
| 87 |
-
print(f"Created search index. {time.time() - start_time:.2f} seconds.")
|
| 88 |
-
########################################
|
| 89 |
-
|
| 90 |
# ESTADO GLOBAL
|
| 91 |
last_result_df: pd.DataFrame = pd.DataFrame()
|
| 92 |
-
|
| 93 |
-
######################
|
| 94 |
last_search_type: str = ""
|
| 95 |
last_search_query: str = ""
|
| 96 |
-
# last_filtros_values: Tuple = ()
|
| 97 |
last_column_filters: list[tuple[str, str]] = []
|
| 98 |
last_sort_col_label: str = ""
|
| 99 |
last_sort_dir: str = ""
|
| 100 |
-
#######################
|
| 101 |
|
| 102 |
# ---------------------------------------------------------------------------
|
| 103 |
# CONFIG --------------------------------------------------------------------
|
| 104 |
# ---------------------------------------------------------------------------
|
| 105 |
-
app_dataset = load_dataset(
|
| 106 |
|
| 107 |
-
# dh_app = fdh.FrontDatasetHandler(app_dataset=pd.read_pickle(DATASET_PATH))
|
| 108 |
dh_app = fdh.FrontDatasetHandler(app_dataset=app_dataset)
|
| 109 |
maestro = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='EQUITY'].copy()
|
| 110 |
maestro_etf = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='ETF'].copy()
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Swift Stock Screener (SSS)
|
| 3 |
+
Copyright 2025 David González Romero
|
| 4 |
+
|
| 5 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
you may not use this file except in compliance with the License.
|
| 7 |
+
You may obtain a copy of the License at
|
| 8 |
+
|
| 9 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
|
| 11 |
+
App URL: https://huggingface.co/spaces/reddgr/sss
|
| 12 |
+
'''
|
| 13 |
+
|
| 14 |
+
# cd C:\Users\david\Documents\git\miax-tfm-dgr; python app.py
|
| 15 |
from pathlib import Path
|
| 16 |
from typing import Tuple
|
| 17 |
import pandas as pd
|
|
|
|
| 27 |
ROOT = Path(__file__).parent
|
| 28 |
|
| 29 |
JSON_PATH = ROOT / "json"
|
| 30 |
+
DATASET_PATH = "reddgr/swift-stock-screener" # Hugging Face hub dataset name
|
| 31 |
+
EMB_MODEL_PATH = "FinLang/finance-embeddings-investopedia" # Hugging Face Hub embeddings model name
|
| 32 |
DOTENV_PATH = ROOT.parent.parent / "apis" / ".env"
|
| 33 |
+
PARQUET_PATH = ROOT / "parquet" / "app_dataset.parquet"
|
| 34 |
# DUCKDB_PATH = ROOT / "db" / "sss_vectordb.duckdb"
|
| 35 |
|
| 36 |
from src import front_dataset_handler as fdh, app_utils as utils, semantic_search as ss, env_options
|
| 37 |
tokens = env_options.check_env(use_dotenv=USE_DOTENV, dotenv_path=DOTENV_PATH, env_tokens = ["HF_TOKEN"])
|
| 38 |
+
|
| 39 |
+
emb_model = SentenceTransformer(EMB_MODEL_PATH, token = tokens.get("HF_TOKEN"))
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
#### CONEXIÓN DE DUCKDB CON EL DATASET PARA INDEXAR ####
|
|
|
|
| 43 |
print("Initializing DuckDB connection...")
|
| 44 |
con = duckdb.connect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
|
|
|
| 46 |
create_table_query = f"""
|
| 47 |
INSTALL vss;
|
| 48 |
LOAD vss;
|
| 49 |
SET hnsw_enable_experimental_persistence = true;
|
| 50 |
CREATE TABLE vector_table AS
|
| 51 |
SELECT *, embeddings::float[{emb_model.get_sentence_embedding_dimension()}] as embeddings_float
|
| 52 |
+
FROM '{PARQUET_PATH}';
|
| 53 |
"""
|
| 54 |
|
| 55 |
con.sql(create_table_query)
|
|
|
|
| 60 |
"""
|
| 61 |
con.sql(create_index_query)
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
# ESTADO GLOBAL
|
| 64 |
last_result_df: pd.DataFrame = pd.DataFrame()
|
|
|
|
|
|
|
| 65 |
last_search_type: str = ""
|
| 66 |
last_search_query: str = ""
|
|
|
|
| 67 |
last_column_filters: list[tuple[str, str]] = []
|
| 68 |
last_sort_col_label: str = ""
|
| 69 |
last_sort_dir: str = ""
|
|
|
|
| 70 |
|
| 71 |
# ---------------------------------------------------------------------------
|
| 72 |
# CONFIG --------------------------------------------------------------------
|
| 73 |
# ---------------------------------------------------------------------------
|
| 74 |
+
app_dataset = load_dataset(DATASET_PATH, split="train", token = tokens.get("HF_TOKEN")).to_pandas()
|
| 75 |
|
|
|
|
| 76 |
dh_app = fdh.FrontDatasetHandler(app_dataset=app_dataset)
|
| 77 |
maestro = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='EQUITY'].copy()
|
| 78 |
maestro_etf = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='ETF'].copy()
|