Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import hashlib | |
| import sys | |
| def download_genipapo_model(): | |
| # Direct download URL from GitHub Releases | |
| model_url = 'https://github.com/bryankhelven/genipapo/releases/download/Publishing/genipapo.pt' | |
| model_dir = os.path.join('models') | |
| model_path = os.path.join(model_dir, 'genipapo.pt') | |
| if not os.path.exists(model_dir): | |
| os.makedirs(model_dir) | |
| if os.path.exists(model_path): | |
| print("Genipapo model already exists. Verifying checksum...") | |
| with open(model_path, 'rb') as f: | |
| data = f.read() | |
| checksum = hashlib.md5(data).hexdigest() | |
| if checksum == model_checksum: | |
| print("Checksum verified. Model is ready to use.") | |
| return | |
| else: | |
| print("Checksum mismatch. Redownloading the model...") | |
| os.remove(model_path) | |
| print("Downloading Genipapo model...") | |
| response = requests.get(model_url, stream=True) | |
| if response.status_code != 200: | |
| print("Failed to download the model. Please check the URL.") | |
| sys.exit(1) | |
| with open(model_path, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| f.write(chunk) | |
| print("Download completed. Model is ready to use.") | |
| # Diretório onde os recursos serão salvos | |
| RESOURCE_DIR = "stanza_resources" | |
| LANGUAGE = "pt" | |
| # Mapear os componentes necessários com os URLs corrigidos | |
| REQUIRED_COMPONENTS = { | |
| "backward_charlm": "https://huggingface.co/stanfordnlp/stanza-pt/resolve/main/models/backward_charlm/oscar2023.pt", | |
| "forward_charlm": "https://huggingface.co/stanfordnlp/stanza-pt/resolve/main/models/forward_charlm/oscar2023.pt", | |
| "pretrain": "https://huggingface.co/stanfordnlp/stanza-pt/resolve/main/models/pretrain/conll17.pt", | |
| } | |
| # Função para baixar arquivos com progresso | |
| def download_file(url, dest_path): | |
| with requests.get(url, stream=True) as response: | |
| response.raise_for_status() | |
| with open(dest_path, "wb") as file: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| file.write(chunk) | |
| # Função para baixar recursos específicos | |
| def download_specific_resources(): | |
| if not os.path.exists(RESOURCE_DIR): | |
| os.makedirs(RESOURCE_DIR) | |
| # Baixar o arquivo `resources.json` | |
| resources_url = "https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json" | |
| resources_path = os.path.join(RESOURCE_DIR, "resources.json") | |
| print("Baixando resources.json...") | |
| download_file(resources_url, resources_path) | |
| # Caminho base para os recursos do idioma | |
| lang_dir = os.path.join(RESOURCE_DIR, LANGUAGE) | |
| if not os.path.exists(lang_dir): | |
| os.makedirs(lang_dir) | |
| # Baixar os componentes necessários | |
| for component, url in REQUIRED_COMPONENTS.items(): | |
| component_dir = os.path.join(lang_dir, component) | |
| os.makedirs(component_dir, exist_ok=True) | |
| component_path = os.path.join(component_dir, "model.pt") | |
| print(f"Baixando {component}...") | |
| download_file(url, component_path) | |
| print(f"{component} baixado para {component_path}") | |
| print("Download concluído. Recursos disponíveis em:", RESOURCE_DIR) | |
| if __name__ == '__main__': | |
| download_genipapo_model() | |
| download_specific_resources() | |