Spaces:
Build error
Build error
| import os | |
| import time | |
| import requests | |
| import numpy as np | |
| from flask import Flask, render_template, request, send_file | |
| from rdkit import Chem | |
| from transformers import AutoModelForMaskedLM, AutoTokenizer | |
| from bio_embeddings.embed import ProtTransBertBFDEmbedder | |
| from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list | |
| # π Define Directories for Railway | |
| bio_model_dir = "/app/modelsBioembed" # Persistent model storage | |
| cvn_model_dir = "/app/models_folder" | |
| UPLOAD_FOLDER = "/app/Samples" | |
| os.makedirs(bio_model_dir, exist_ok=True) | |
| os.makedirs(cvn_model_dir, exist_ok=True) | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| # β Environment Variables for Temp Directory | |
| os.environ["TMPDIR"] = bio_model_dir | |
| os.environ["TEMP"] = bio_model_dir | |
| os.environ["TMP"] = bio_model_dir | |
| os.environ['NUMBA_CACHE_DIR'] = '/app/numba_cache' | |
| os.environ['TRANSFORMERS_CACHE'] = '/app/hf_cache' | |
| # π Dropbox Links for Model Files | |
| DROPBOX_LINKS = { | |
| "pytorch_model.bin": "https://www.dropbox.com/scl/fi/b41t8c6ji7j6uk5y2jj8g/pytorch_model.bin?rlkey=kuuwkid36ugml560c4a465ilr&st=t60bfemx&dl=1", | |
| "config.json": "https://www.dropbox.com/scl/fi/js6czj3kfc4a5kshfkzie/config.json?rlkey=5oysq4ecilnan5tviuqe86v93&st=75zpce8h&dl=1", | |
| "tokenizer_config.json": "https://www.dropbox.com/scl/fi/x11poym6mueoxod7xb6f1/tokenizer_config.json?rlkey=s51pik2rkmqp1fu99qj9qaria&st=z9kkcxp7&dl=1", | |
| "vocab.txt": "https://www.dropbox.com/scl/fi/v6e2gn10ck4lpx4iv9kpe/vocab.txt?rlkey=dcu29g5ns4wtqdv0pkks0ehx1&st=qt187rhq&dl=1", | |
| "special_tokens_map.json": "https://www.dropbox.com/scl/fi/t3lvmp5x28d1zjac3j7ec/special_tokens_map.json?rlkey=z2xbompa54iu4y9qgb5bvmfc9&st=zrxlpjdt&dl=1" | |
| } | |
| # # π₯ Function to Download Model Files | |
| # def download_model_files(): | |
| # for filename, url in DROPBOX_LINKS.items(): | |
| # file_path = os.path.join(bio_model_dir, filename) | |
| # if not os.path.exists(file_path): # Avoid re-downloading | |
| # print(f"Downloading {filename}...") | |
| # response = requests.get(url, stream=True) | |
| # if response.status_code == 200: | |
| # with open(file_path, "wb") as f: | |
| # for chunk in response.iter_content(chunk_size=1024): | |
| # f.write(chunk) | |
| # print(f"Downloaded: {filename}") | |
| # else: | |
| # print(f"Failed to download {filename}") | |
| def download_model_files(): | |
| for filename, url in DROPBOX_LINKS.items(): | |
| file_path = os.path.join(bio_model_dir, filename) | |
| print(f"Downloading {filename} (forcing overwrite)...") | |
| response = requests.get(url, stream=True) | |
| if response.status_code == 200: | |
| with open(file_path, "wb") as f: | |
| for chunk in response.iter_content(chunk_size=1024): | |
| f.write(chunk) | |
| print(f"Downloaded: {filename}") | |
| else: | |
| print(f"Failed to download {filename}") | |
| # π₯ Download models before starting | |
| download_model_files() | |
| # # β Load ProtTrans-BERT-BFD Model | |
| # print("Loading ProtTrans-BERT-BFD model...") | |
| # model = AutoModelForMaskedLM.from_pretrained(bio_model_dir) | |
| # tokenizer = AutoTokenizer.from_pretrained(bio_model_dir) | |
| # β Load Bio-Embedding Model | |
| try: | |
| print("Loading ProtTrans-BERT-BFD model...") | |
| embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) | |
| except Exception as e: | |
| print(f"Error loading ProtTrans-BERT-BFD model: {e}") | |
| embedder = None | |
| # 𧬠Generate Bio-Embeddings | |
| def generate_bio_embeddings(sequence): | |
| if embedder is None: | |
| return None | |
| try: | |
| embedding_protein = embedder.embed(sequence) | |
| embedding_per_protein = embedder.reduce_per_protein(embedding_protein) | |
| return np.array(embedding_per_protein).reshape(1, -1) | |
| except Exception as e: | |
| print(f"Embedding Error: {e}") | |
| return None | |
| # π¬ Generate SMILES from Protein Sequence | |
| def generate_smiles(sequence, n_samples=100): | |
| start_time = time.time() | |
| protein_embedding = generate_bio_embeddings(sequence) | |
| if protein_embedding is None: | |
| return None, "Embedding generation failed!" | |
| model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) | |
| samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') | |
| valid_samples = [sample for sample in samples if sample is not None] | |
| smiles_list = [ | |
| Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None | |
| ] | |
| if not smiles_list: | |
| return None, "No valid SMILES generated!" | |
| filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| with open(filename, "w") as file: | |
| file.write("\n".join(smiles_list)) | |
| elapsed_time = time.time() - start_time | |
| return filename, elapsed_time | |
| # π Flask Web App | |
| app = Flask(__name__) | |
| def index(): | |
| if request.method == "POST": | |
| sequence = request.form["sequence"].strip() | |
| if not sequence: | |
| return render_template("index.html", message="Please enter a valid sequence.") | |
| file_path, result = generate_smiles(sequence) | |
| if file_path is None: | |
| return render_template("index.html", message=f"Error: {result}") | |
| return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) | |
| return render_template("index.html") | |
| def download_file(): | |
| file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| return send_file(file_path, as_attachment=True) | |
| # π Run the Flask App on Railway | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) | |
| # import os | |
| # import time | |
| # import requests | |
| # import numpy as np | |
| # import subprocess | |
| # from flask import Flask, render_template, request, send_file | |
| # from rdkit import Chem | |
| # from transformers import AutoModel | |
| # from bio_embeddings.embed import ProtTransBertBFDEmbedder | |
| # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list | |
| # # DROPBOX LINKS FOR MODEL FILES | |
| # DROPBOX_LINKS = { | |
| # "pytorch_model.bin": "https://www.dropbox.com/scl/fi/b41t8c6ji7j6uk5y2jj8g/pytorch_model.bin?rlkey=kuuwkid36ugml560c4a465ilr&st=t60bfemx&dl=1", | |
| # "config.json": "https://www.dropbox.com/scl/fi/js6czj3kfc4a5kshfkzie/config.json?rlkey=5oysq4ecilnan5tviuqe86v93&st=75zpce8h&dl=1", | |
| # "tokenizer_config.json": "https://www.dropbox.com/scl/fi/x11poym6mueoxod7xb6f1/tokenizer_config.json?rlkey=s51pik2rkmqp1fu99qj9qaria&st=z9kkcxp7&dl=1", | |
| # "vocab.txt": "https://www.dropbox.com/scl/fi/v6e2gn10ck4lpx4iv9kpe/vocab.txt?rlkey=dcu29g5ns4wtqdv0pkks0ehx1&st=qt187rhq&dl=1", | |
| # "special_tokens_map.json": "https://www.dropbox.com/scl/fi/t3lvmp5x28d1zjac3j7ec/special_tokens_map.json?rlkey=z2xbompa54iu4y9qgb5bvmfc9&st=zrxlpjdt&dl=1" | |
| # } | |
| # # LOCAL DIRECTORIES | |
| # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") | |
| # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") | |
| # UPLOAD_FOLDER = "Samples" | |
| # os.makedirs(bio_model_dir, exist_ok=True) | |
| # os.makedirs(cvn_model_dir, exist_ok=True) | |
| # os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| # os.environ["TMPDIR"] = bio_model_dir | |
| # os.environ["TEMP"] = bio_model_dir | |
| # os.environ["TMP"] = bio_model_dir | |
| # # FUNCTION TO DOWNLOAD FILES FROM DROPBOX | |
| # for file_name, url in DROPBOX_LINKS.items(): | |
| # file_path = os.path.join(bio_model_dir, file_name) | |
| # if not os.path.exists(file_path): | |
| # print(f"Downloading {file_name} from Dropbox...") | |
| # subprocess.run(["wget", "-O", file_path, url], check=True) | |
| # print(f"{file_name} downloaded!") | |
| # # BIO-EMBEDDING MODEL LOADING | |
| # try: | |
| # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) | |
| # except Exception as e: | |
| # print(f"Error loading ProtTrans-BERT-BFD model: {e}") | |
| # embedder = None | |
| # def generate_bio_embeddings(sequence): | |
| # if embedder is None: | |
| # return None | |
| # try: | |
| # embedding_protein = embedder.embed(sequence) | |
| # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) | |
| # return np.array(embedding_per_protein).reshape(1, -1) | |
| # except Exception as e: | |
| # print(f"Embedding Error: {e}") | |
| # return None | |
| # def generate_smiles(sequence, n_samples=100): | |
| # start_time = time.time() | |
| # protein_embedding = generate_bio_embeddings(sequence) | |
| # if protein_embedding is None: | |
| # return None, "Embedding generation failed!" | |
| # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) | |
| # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') | |
| # valid_samples = [sample for sample in samples if sample is not None] | |
| # smiles_list = [ | |
| # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None | |
| # ] | |
| # if not smiles_list: | |
| # return None, "No valid SMILES generated!" | |
| # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # with open(filename, "w") as file: | |
| # file.write("\n".join(smiles_list)) | |
| # elapsed_time = time.time() - start_time | |
| # return filename, elapsed_time | |
| # app = Flask(__name__) | |
| # @app.route("/", methods=["GET", "POST"]) | |
| # def index(): | |
| # if request.method == "POST": | |
| # sequence = request.form["sequence"].strip() | |
| # if not sequence: | |
| # return render_template("index.html", message="Please enter a valid sequence.") | |
| # file_path, result = generate_smiles(sequence) | |
| # if file_path is None: | |
| # return render_template("index.html", message=f"Error: {result}") | |
| # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) | |
| # return render_template("index.html") | |
| # @app.route("/download") | |
| # def download_file(): | |
| # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # return send_file(file_path, as_attachment=True) | |
| # if __name__ == "__main__": | |
| # app.run(host="0.0.0.0", port=8000, debug=True) | |
| # import os | |
| # import time | |
| # import numpy as np | |
| # from flask import Flask, render_template, request, send_file | |
| # from rdkit import Chem | |
| # from transformers import AutoModel | |
| # from bio_embeddings.embed import ProtTransBertBFDEmbedder | |
| # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list | |
| # # # DIRECTORIES | |
| # # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings | |
| # # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder | |
| # #bio_model_dir = os.getenv("BIO_MODEL_DIR", "modelsBioembed") | |
| # bio_model_dir = "/app/modelsBioembed" | |
| # cvn_model_dir = os.getenv("CVN_MODEL_DIR", "models_folder") | |
| # os.makedirs(bio_model_dir, exist_ok=True) | |
| # os.makedirs(cvn_model_dir, exist_ok=True) | |
| # os.environ["TMPDIR"] = bio_model_dir | |
| # os.environ["TEMP"] = bio_model_dir | |
| # os.environ["TMP"] = bio_model_dir | |
| # UPLOAD_FOLDER = "Samples" | |
| # os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| # app = Flask(__name__) | |
| # # model_path = os.path.join(bio_model_dir, "pytorch_model.bin") | |
| # # if not os.path.exists(model_path): | |
| # # print("Downloading ProtTrans-BERT-BFD model...") | |
| # # AutoModel.from_pretrained("Rostlab/prot_bert_bfd", low_cpu_mem_usage=True).save_pretrained(bio_model_dir) | |
| # # BIO-EMBEDDING MODEL LOADING | |
| # try: | |
| # print("Loading Model") | |
| # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) | |
| # except Exception as e: | |
| # print(f"Error loading ProtTrans-BERT-BFD model: {e}") | |
| # embedder = None | |
| # def generate_bio_embeddings(sequence): | |
| # """Generate bio-embeddings for a given protein sequence.""" | |
| # if embedder is None: | |
| # return None | |
| # try: | |
| # embedding_protein = embedder.embed(sequence) | |
| # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) | |
| # return np.array(embedding_per_protein).reshape(1, -1) # Reshape for model compatibility | |
| # except Exception as e: | |
| # print(f"Embedding Error: {e}") | |
| # return None | |
| # def generate_smiles(sequence, n_samples=100): | |
| # """Generate SMILES from a protein sequence.""" | |
| # start_time = time.time() | |
| # protein_embedding = generate_bio_embeddings(sequence) | |
| # if protein_embedding is None: | |
| # return None, "Embedding generation failed!" | |
| # # TRAINED CVanilla_RNN_Builder MODEL LOADING | |
| # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) | |
| # # MOLECULAR GRAPH GENERATION | |
| # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') | |
| # valid_samples = [sample for sample in samples if sample is not None] | |
| # # CONVERSION TO SMILES | |
| # smiles_list = [ | |
| # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None | |
| # ] | |
| # if not smiles_list: | |
| # return None, "No valid SMILES generated!" | |
| # # SAVING TO FILE | |
| # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # with open(filename, "w") as file: | |
| # file.write("\n".join(smiles_list)) | |
| # elapsed_time = time.time() - start_time | |
| # return filename, elapsed_time | |
| # @app.route("/", methods=["GET", "POST"]) | |
| # def index(): | |
| # if request.method == "POST": | |
| # sequence = request.form["sequence"].strip() | |
| # if not sequence: | |
| # return render_template("index.html", message="Please enter a valid sequence.") | |
| # file_path, result = generate_smiles(sequence) | |
| # if file_path is None: | |
| # return render_template("index.html", message=f"Error: {result}") | |
| # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) | |
| # return render_template("index.html") | |
| # @app.route("/download") | |
| # def download_file(): | |
| # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # return send_file(file_path, as_attachment=True) | |
| # if __name__ == "__main__": | |
| # app.run(host="0.0.0.0", port=8000) | |
| #MAIN | |
| # import os | |
| # import time | |
| # import requests | |
| # import numpy as np | |
| # from flask import Flask, render_template, request, send_file | |
| # from rdkit import Chem | |
| # from transformers import AutoModel | |
| # from bio_embeddings.embed import ProtTransBertBFDEmbedder | |
| # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list | |
| # # HUGGING FACE MODEL REPO (Replace with your actual Hugging Face username) | |
| # MODEL_BASE_URL = "https://huggingface.co/Bhanushray/protein-smiles-model/tree/main" | |
| # # REQUIRED MODEL FILES | |
| # MODEL_FILES = [ | |
| # "pytorch_model.bin", | |
| # "config.json", | |
| # "tokenizer_config.json", | |
| # "vocab.txt", | |
| # "special_tokens_map.json" | |
| # ] | |
| # # DIRECTORIES | |
| # bio_model_dir = os.getenv("BIO_MODEL_DIR", "modelsBioembed") | |
| # cvn_model_dir = os.getenv("CVN_MODEL_DIR", "models_folder") | |
| # # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings | |
| # # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder | |
| # os.makedirs(bio_model_dir, exist_ok=True) | |
| # os.makedirs(cvn_model_dir, exist_ok=True) | |
| # os.environ["TMPDIR"] = bio_model_dir | |
| # os.environ["TEMP"] = bio_model_dir | |
| # os.environ["TMP"] = bio_model_dir | |
| # UPLOAD_FOLDER = "Samples" | |
| # os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| # app = Flask(__name__) | |
| # # DOWNLOAD MODEL FILES IF MISSING | |
| # for file_name in MODEL_FILES: | |
| # file_path = os.path.join(bio_model_dir, file_name) | |
| # if not os.path.exists(file_path): | |
| # print(f"Downloading {file_name} ...") | |
| # response = requests.get(MODEL_BASE_URL + file_name, stream=True) | |
| # with open(file_path, "wb") as f: | |
| # for chunk in response.iter_content(chunk_size=1024): | |
| # f.write(chunk) | |
| # print(f"{file_name} downloaded!") | |
| # # BIO-EMBEDDING MODEL LOADING | |
| # try: | |
| # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) | |
| # except Exception as e: | |
| # print(f"Error loading ProtTrans-BERT-BFD model: {e}") | |
| # embedder = None | |
| # def generate_bio_embeddings(sequence): | |
| # """Generate bio-embeddings for a given protein sequence.""" | |
| # if embedder is None: | |
| # return None | |
| # try: | |
| # embedding_protein = embedder.embed(sequence) | |
| # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) | |
| # return np.array(embedding_per_protein).reshape(1, -1) # Reshape for model compatibility | |
| # except Exception as e: | |
| # print(f"Embedding Error: {e}") | |
| # return None | |
| # def generate_smiles(sequence, n_samples=100): | |
| # """Generate SMILES from a protein sequence.""" | |
| # start_time = time.time() | |
| # protein_embedding = generate_bio_embeddings(sequence) | |
| # if protein_embedding is None: | |
| # return None, "Embedding generation failed!" | |
| # # LOAD TRAINED CVanilla_RNN_Builder MODEL | |
| # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) | |
| # # MOLECULAR GRAPH GENERATION | |
| # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') | |
| # valid_samples = [sample for sample in samples if sample is not None] | |
| # # CONVERT TO SMILES | |
| # smiles_list = [ | |
| # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None | |
| # ] | |
| # if not smiles_list: | |
| # return None, "No valid SMILES generated!" | |
| # # SAVE TO FILE | |
| # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # with open(filename, "w") as file: | |
| # file.write("\n".join(smiles_list)) | |
| # elapsed_time = time.time() - start_time | |
| # return filename, elapsed_time | |
| # @app.route("/", methods=["GET", "POST"]) | |
| # def index(): | |
| # if request.method == "POST": | |
| # sequence = request.form["sequence"].strip() | |
| # if not sequence: | |
| # return render_template("index.html", message="Please enter a valid sequence.") | |
| # file_path, result = generate_smiles(sequence) | |
| # if file_path is None: | |
| # return render_template("index.html", message=f"Error: {result}") | |
| # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) | |
| # return render_template("index.html") | |
| # @app.route("/download") | |
| # def download_file(): | |
| # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # return send_file(file_path, as_attachment=True) | |
| # if __name__ == "__main__": | |
| # app.run(host="0.0.0.0", port=8000, debug=True) | |
| # import os | |
| # import time | |
| # import numpy as np | |
| # from flask import Flask, render_template, request, send_file | |
| # from rdkit import Chem | |
| # from transformers import AutoModel | |
| # from bio_embeddings.embed import ProtTransBertBFDEmbedder | |
| # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list | |
| # # DIRECTORIES | |
| # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings | |
| # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder | |
| # os.makedirs(bio_model_dir, exist_ok=True) | |
| # os.makedirs(cvn_model_dir, exist_ok=True) | |
| # os.environ["TMPDIR"] = bio_model_dir | |
| # os.environ["TEMP"] = bio_model_dir | |
| # os.environ["TMP"] = bio_model_dir | |
| # UPLOAD_FOLDER = "Samples" | |
| # os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| # app = Flask(__name__) | |
| # model_path = os.path.join(bio_model_dir, "pytorch_model.bin") | |
| # if not os.path.exists(model_path): | |
| # print("Downloading ProtTrans-BERT-BFD model...") | |
| # AutoModel.from_pretrained("Rostlab/prot_bert_bfd", low_cpu_mem_usage=True).save_pretrained(bio_model_dir) | |
| # # BIO-EMBEDDING MODEL LOADING | |
| # try: | |
| # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) | |
| # except Exception as e: | |
| # print(f"Error loading ProtTrans-BERT-BFD model: {e}") | |
| # embedder = None | |
| # def generate_bio_embeddings(sequence): | |
| # """Generate bio-embeddings for a given protein sequence.""" | |
| # if embedder is None: | |
| # return None | |
| # try: | |
| # embedding_protein = embedder.embed(sequence) | |
| # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) | |
| # return np.array(embedding_per_protein).reshape(1, -1) # Reshape for model compatibility | |
| # except Exception as e: | |
| # print(f"Embedding Error: {e}") | |
| # return None | |
| # def generate_smiles(sequence, n_samples=100): | |
| # """Generate SMILES from a protein sequence.""" | |
| # start_time = time.time() | |
| # protein_embedding = generate_bio_embeddings(sequence) | |
| # if protein_embedding is None: | |
| # return None, "Embedding generation failed!" | |
| # # TRAINED CVanilla_RNN_Builder MODEL LOADING | |
| # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) | |
| # # MOLECULAR GRAPH GENERATION | |
| # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') | |
| # valid_samples = [sample for sample in samples if sample is not None] | |
| # # CONVERSION TO SMILES | |
| # smiles_list = [ | |
| # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None | |
| # ] | |
| # if not smiles_list: | |
| # return None, "No valid SMILES generated!" | |
| # # SAVING TO FILE | |
| # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # with open(filename, "w") as file: | |
| # file.write("\n".join(smiles_list)) | |
| # elapsed_time = time.time() - start_time | |
| # return filename, elapsed_time | |
| # @app.route("/", methods=["GET", "POST"]) | |
| # def index(): | |
| # if request.method == "POST": | |
| # sequence = request.form["sequence"].strip() | |
| # if not sequence: | |
| # return render_template("index.html", message="Please enter a valid sequence.") | |
| # file_path, result = generate_smiles(sequence) | |
| # if file_path is None: | |
| # return render_template("index.html", message=f"Error: {result}") | |
| # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) | |
| # return render_template("index.html") | |
| # @app.route("/download") | |
| # def download_file(): | |
| # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # return send_file(file_path, as_attachment=True) | |
| # if __name__ == "__main__": | |
| # app.run(host="0.0.0.0", port=8000,debug=True) | |
| # import os | |
| # import time | |
| # import numpy as np | |
| # from flask import Flask, render_template, request, send_file | |
| # from rdkit import Chem | |
| # from transformers import AutoModel | |
| # from bio_embeddings.embed import ProtTransBertBFDEmbedder | |
| # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list | |
| # from huggingface_hub import hf_hub_download # Import for direct file download | |
| # # Define directories for different models | |
| # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings | |
| # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder | |
| # # Ensure directories exist | |
| # os.makedirs(bio_model_dir, exist_ok=True) | |
| # os.makedirs(cvn_model_dir, exist_ok=True) | |
| # UPLOAD_FOLDER = "Samples" | |
| # os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| # app = Flask(__name__) | |
| # # Download only the required pytorch_model.bin file | |
| # model_filename = "pytorch_model.bin" | |
| # model_path = os.path.join(bio_model_dir, model_filename) | |
| # if not os.path.exists(model_path): | |
| # print("Downloading pytorch_model.bin from Hugging Face...") | |
| # hf_hub_download(repo_id="Rostlab/prot_bert_bfd", filename=model_filename, local_dir=bio_model_dir) | |
| # # Load bio-embedding model once | |
| # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) | |
| # def generate_bio_embeddings(sequence): | |
| # """Generate bio-embeddings for a given protein sequence.""" | |
| # try: | |
| # embedding_protein = embedder.embed(sequence) | |
| # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) | |
| # return np.array(embedding_per_protein).reshape(1, -1) | |
| # except Exception as e: | |
| # print(f"Embedding Error: {e}") | |
| # return None | |
| # def generate_smiles(sequence, n_samples=100): | |
| # """Generate SMILES from a protein sequence.""" | |
| # start_time = time.time() | |
| # protein_embedding = generate_bio_embeddings(sequence) | |
| # if protein_embedding is None: | |
| # return None, "Embedding generation failed!" | |
| # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) | |
| # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') | |
| # valid_samples = [sample for sample in samples if sample is not None] | |
| # smiles_list = [ | |
| # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None | |
| # ] | |
| # if not smiles_list: | |
| # return None, "No valid SMILES generated!" | |
| # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # with open(filename, "w") as file: | |
| # file.write("\n".join(smiles_list)) | |
| # elapsed_time = time.time() - start_time | |
| # return filename, elapsed_time | |
| # @app.route("/", methods=["GET", "POST"]) | |
| # def index(): | |
| # if request.method == "POST": | |
| # sequence = request.form["sequence"].strip() | |
| # if not sequence: | |
| # return render_template("index.html", message="Please enter a valid sequence.") | |
| # file_path, result = generate_smiles(sequence) | |
| # if file_path is None: | |
| # return render_template("index.html", message=f"Error: {result}") | |
| # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) | |
| # return render_template("index.html") | |
| # @app.route("/download") | |
| # def download_file(): | |
| # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # return send_file(file_path, as_attachment=True) | |
| # if __name__ == "__main__": | |
| # app.run(host="0.0.0.0", port=8000, debug=True) | |
| # import os | |
| # import time | |
| # import requests | |
| # import numpy as np | |
| # import gdown # NEW: For Google Drive downloads | |
| # from flask import Flask, render_template, request, send_file | |
| # from rdkit import Chem | |
| # from transformers import AutoModel | |
| # from bio_embeddings.embed import ProtTransBertBFDEmbedder | |
| # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list | |
| # # REPLACE WITH YOUR GOOGLE DRIVE FILE IDs | |
| # GDRIVE_FILE_IDS = { | |
| # "pytorch_model.bin": "11g7bAXYNxlPsnwC8_qsUIZITAjG85JXb", # Replace with actual ID | |
| # "config.json": "1ZfuhTnEuKAI1Z92m1QnDTOEQYNe9y24E", | |
| # "tokenizer_config.json": "1r4ncUsWBNQZVKp4zw97DLTf0AgRUiuFc", | |
| # "vocab.txt": "1G1UQIGMHvCC3OokCG1tl-cTxjIVqw04w", | |
| # "special_tokens_map.json": "1pINnV2P1eBmaC7X0A52UhjrmlJgzxqbl" | |
| # } | |
| # # LOCAL DIRECTORIES | |
| # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings | |
| # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder | |
| # os.makedirs(bio_model_dir, exist_ok=True) | |
| # os.makedirs(cvn_model_dir, exist_ok=True) | |
| # os.environ["TMPDIR"] = bio_model_dir | |
| # os.environ["TEMP"] = bio_model_dir | |
| # os.environ["TMP"] = bio_model_dir | |
| # UPLOAD_FOLDER = "Samples" | |
| # os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| # app = Flask(__name__) | |
| # # DOWNLOAD MODEL FILES IF MISSING | |
| # for file_name, file_id in GDRIVE_FILE_IDS.items(): | |
| # file_path = os.path.join(bio_model_dir, file_name) | |
| # if not os.path.exists(file_path): | |
| # print(f"Downloading {file_name} from Google Drive...") | |
| # gdown.download(f"https://drive.google.com/uc?id={file_id}", file_path, quiet=False) | |
| # print(f"{file_name} downloaded!") | |
| # # BIO-EMBEDDING MODEL LOADING | |
| # try: | |
| # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) | |
| # except Exception as e: | |
| # print(f"Error loading ProtTrans-BERT-BFD model: {e}") | |
| # embedder = None | |
| # def generate_bio_embeddings(sequence): | |
| # """Generate bio-embeddings for a given protein sequence.""" | |
| # if embedder is None: | |
| # return None | |
| # try: | |
| # embedding_protein = embedder.embed(sequence) | |
| # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) | |
| # return np.array(embedding_per_protein).reshape(1, -1) # Reshape for model compatibility | |
| # except Exception as e: | |
| # print(f"Embedding Error: {e}") | |
| # return None | |
| # def generate_smiles(sequence, n_samples=100): | |
| # """Generate SMILES from a protein sequence.""" | |
| # start_time = time.time() | |
| # protein_embedding = generate_bio_embeddings(sequence) | |
| # if protein_embedding is None: | |
| # return None, "Embedding generation failed!" | |
| # # LOAD TRAINED CVanilla_RNN_Builder MODEL | |
| # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) | |
| # # MOLECULAR GRAPH GENERATION | |
| # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') | |
| # valid_samples = [sample for sample in samples if sample is not None] | |
| # # CONVERT TO SMILES | |
| # smiles_list = [ | |
| # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None | |
| # ] | |
| # if not smiles_list: | |
| # return None, "No valid SMILES generated!" | |
| # # SAVE TO FILE | |
| # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # with open(filename, "w") as file: | |
| # file.write("\n".join(smiles_list)) | |
| # elapsed_time = time.time() - start_time | |
| # return filename, elapsed_time | |
| # @app.route("/", methods=["GET", "POST"]) | |
| # def index(): | |
| # if request.method == "POST": | |
| # sequence = request.form["sequence"].strip() | |
| # if not sequence: | |
| # return render_template("index.html", message="Please enter a valid sequence.") | |
| # file_path, result = generate_smiles(sequence) | |
| # if file_path is None: | |
| # return render_template("index.html", message=f"Error: {result}") | |
| # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) | |
| # return render_template("index.html") | |
| # @app.route("/download") | |
| # def download_file(): | |
| # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") | |
| # return send_file(file_path, as_attachment=True) | |
| # if __name__ == "__main__": | |
| # app.run(host="0.0.0.0", port=8000, debug=True) | |
| # import os | |
| # import time | |
| # import gdown | |
| # import numpy as np | |
| # from flask import Flask, render_template, request, send_file | |
| # from rdkit import Chem | |
| # from bio_embeddings.embed import ProtTransBertBFDEmbedder | |
| # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list | |
| # # DIRECTORIES | |
| # bio_model_dir = "/app/modelsBioembed" | |
| # cvn_model_dir = os.getenv("CVN_MODEL_DIR", "models_folder") | |
| # upload_folder = "Samples" | |
| # # Create directories if they don't exist | |
| # os.makedirs(bio_model_dir, exist_ok=True) | |
| # os.makedirs(cvn_model_dir, exist_ok=True) | |
| # os.makedirs(upload_folder, exist_ok=True) | |
| # # Google Drive file IDs for the model files | |
| # MODEL_FILES = { | |
| # "pytorch_model.bin": "1Z9XWk-kP5yrBRdBF_mQPQsM8drqQXafJ", | |
| # "config.json": "1adE428T5ZWeosoLsBeX7sVnn6m4VvVgL", | |
| # "tokenizer_config.json": "1USvLAZ3dM4TzVSRLjINk2_W989k1HDQ0", | |
| # "vocab.txt": "1tsdesfbr61UyLShV0ojvsXOp6VJ9Exrt", | |
| # "special_tokens_map.json": "1ChCwdz0NH8ODasqscGwCS9mY7urhQte2", | |
| # } | |
| # # Function to download missing files from Google Drive | |
| # def download_model_files(): | |
| # for filename, file_id in MODEL_FILES.items(): | |
| # file_path = os.path.join(bio_model_dir, filename) | |
| # if not os.path.exists(file_path): | |
| # print(f"Downloading {filename} from Google Drive...") | |
| # gdown.download(f"https://drive.google.com/uc?id={file_id}", file_path, quiet=False) | |
| # # Download required model files | |
| # download_model_files() | |
| # print("All model files are ready!") | |
| # # Load the ProtTrans-BERT-BFD Model | |
| # try: | |
| # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) | |
| # print("ProtTrans-BERT-BFD model loaded successfully!") | |
| # except Exception as e: | |
| # print(f"Error loading model: {e}") | |
| # embedder = None | |
| # # Function to generate protein embeddings | |
| # def generate_bio_embeddings(sequence): | |
| # if embedder is None: | |
| # return None | |
| # try: | |
| # embedding_protein = embedder.embed(sequence) | |
| # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) | |
| # return np.array(embedding_per_protein).reshape(1, -1) | |
| # except Exception as e: | |
| # print(f"Embedding Error: {e}") | |
| # return None | |
| # # Function to generate SMILES from a protein sequence | |
| # def generate_smiles(sequence, n_samples=100): | |
| # start_time = time.time() | |
| # protein_embedding = generate_bio_embeddings(sequence) | |
| # if protein_embedding is None: | |
| # return None, "Embedding generation failed!" | |
| # # Load the trained CVanilla_RNN_Builder model | |
| # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) | |
| # # Generate molecular graphs | |
| # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') | |
| # valid_samples = [sample for sample in samples if sample is not None] | |
| # # Convert to SMILES format | |
| # smiles_list = [ | |
| # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None | |
| # ] | |
| # if not smiles_list: | |
| # return None, "No valid SMILES generated!" | |
| # # Save SMILES to a file | |
| # filename = os.path.join(upload_folder, "SMILES_GENERATED.txt") | |
| # with open(filename, "w") as file: | |
| # file.write("\n".join(smiles_list)) | |
| # elapsed_time = time.time() - start_time | |
| # return filename, elapsed_time | |
| # # Initialize Flask App | |
| # app = Flask(__name__) | |
| # @app.route("/", methods=["GET", "POST"]) | |
| # def index(): | |
| # if request.method == "POST": | |
| # sequence = request.form["sequence"].strip() | |
| # if not sequence: | |
| # return render_template("index.html", message="Please enter a valid sequence.") | |
| # file_path, result = generate_smiles(sequence) | |
| # if file_path is None: | |
| # return render_template("index.html", message=f"Error: {result}") | |
| # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) | |
| # return render_template("index.html") | |
| # @app.route("/download") | |
| # def download_file(): | |
| # file_path = os.path.join(upload_folder, "SMILES_GENERATED.txt") | |
| # return send_file(file_path, as_attachment=True) | |
| # if __name__ == "__main__": | |
| # app.run(host="0.0.0.0", port=8000) | |
| # import os | |
| # import time | |
| # import requests | |
| # from flask import Flask, render_template, request, send_file | |
| # from rdkit import Chem | |
| # from bio_embeddings.embed import ProtTransBertBFDEmbedder | |
| # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list | |
| # # DIRECTORIES | |
| # bio_model_dir = "/app/modelsBioembed" | |
| # cvn_model_dir = os.getenv("CVN_MODEL_DIR", "models_folder") | |
| # upload_folder = "Samples" | |
| # # Create directories if they don't exist | |
| # os.makedirs(bio_model_dir, exist_ok=True) | |
| # os.makedirs(cvn_model_dir, exist_ok=True) | |
| # os.makedirs(upload_folder, exist_ok=True) | |
| # # Google Drive file IDs for the model files | |
| # MODEL_FILES = { | |
| # "pytorch_model.bin": "1Z9XWk-kP5yrBRdBF_mQPQsM8drqQXafJ", | |
| # "config.json": "1adE428T5ZWeosoLsBeX7sVnn6m4VvVgL", | |
| # "tokenizer_config.json": "1USvLAZ3dM4TzVSRLjINk2_W989k1HDQ0", | |
| # "vocab.txt": "1tsdesfbr61UyLShV0ojvsXOp6VJ9Exrt", | |
| # "special_tokens_map.json": "1ChCwdz0NH8ODasqscGwCS9mY7urhQte2", | |
| # } | |
| # # Function to download a file from Google Drive | |
| # def download_file_from_google_drive(file_id, destination): | |
| # URL = f"https://drive.google.com/uc?export=download&id={file_id}" | |
| # session = requests.Session() | |
| # response = session.get(URL, stream=True) | |
| # # Check if the request was successful | |
| # if response.status_code == 200: | |
| # with open(destination, "wb") as f: | |
| # for chunk in response.iter_content(chunk_size=128): | |
| # f.write(chunk) | |
| # print(f"Downloaded {destination}") | |
| # else: | |
| # print(f"Failed to download {destination}") | |
| # # Function to download missing files from Google Drive | |
| # def download_model_files(): | |
| # for filename, file_id in MODEL_FILES.items(): | |
| # file_path = os.path.join(bio_model_dir, filename) | |
| # if not os.path.exists(file_path): | |
| # print(f"Downloading {filename} from Google Drive...") | |
| # download_file_from_google_drive(file_id, file_path) | |
| # # Download required model files | |
| # download_model_files() | |
| # print("All model files are ready!") | |
| # # Load the ProtTrans-BERT-BFD Model | |
| # try: | |
| # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) | |
| # print("ProtTrans-BERT-BFD model loaded successfully!") | |
| # except Exception as e: | |
| # print(f"Error loading model: {e}") | |
| # embedder = None | |
| # # Function to generate protein embeddings | |
| # def generate_bio_embeddings(sequence): | |
| # if embedder is None: | |
| # return None | |
| # try: | |
| # embedding_protein = embedder.embed(sequence) | |
| # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) | |
| # return np.array(embedding_per_protein).reshape(1, -1) | |
| # except Exception as e: | |
| # print(f"Embedding Error: {e}") | |
| # return None | |
| # # Function to generate SMILES from a protein sequence | |
| # def generate_smiles(sequence, n_samples=100): | |
| # start_time = time.time() | |
| # protein_embedding = generate_bio_embeddings(sequence) | |
| # if protein_embedding is None: | |
| # return None, "Embedding generation failed!" | |
| # # Load the trained CVanilla_RNN_Builder model | |
| # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) | |
| # # Generate molecular graphs | |
| # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') | |
| # valid_samples = [sample for sample in samples if sample is not None] | |
| # # Convert to SMILES format | |
| # smiles_list = [ | |
| # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None | |
| # ] | |
| # if not smiles_list: | |
| # return None, "No valid SMILES generated!" | |
| # # Save SMILES to a file | |
| # filename = os.path.join(upload_folder, "SMILES_GENERATED.txt") | |
| # with open(filename, "w") as file: | |
| # file.write("\n".join(smiles_list)) | |
| # elapsed_time = time.time() - start_time | |
| # return filename, elapsed_time | |
| # # Initialize Flask App | |
| # app = Flask(__name__) | |
| # @app.route("/", methods=["GET", "POST"]) | |
| # def index(): | |
| # if request.method == "POST": | |
| # sequence = request.form["sequence"].strip() | |
| # if not sequence: | |
| # return render_template("index.html", message="Please enter a valid sequence.") | |
| # file_path, result = generate_smiles(sequence) | |
| # if file_path is None: | |
| # return render_template("index.html", message=f"Error: {result}") | |
| # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) | |
| # return render_template("index.html") | |
| # @app.route("/download") | |
| # def download_file(): | |
| # file_path = os.path.join(upload_folder, "SMILES_GENERATED.txt") | |
| # return send_file(file_path, as_attachment=True) | |
| # if __name__ == "__main__": | |
| # app.run(host="0.0.0.0", port=8000) | |