|
|
import os |
|
|
import json |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
from huggingface_hub import hf_hub_download |
|
|
from tensorflow import keras |
|
|
from sentence_transformers import SentenceTransformer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
REPO_ID = "Bocklitz-Lab/lit2vec-subfield-classifier-model" |
|
|
EMBED_MODEL = "intfloat/e5-large-v2" |
|
|
TEXT_PREFIX = {"abstract": "abstract: ", "summary": "summary: "} |
|
|
DEFAULT_THRESHOLD = 0.5 |
|
|
TOPK_DEFAULT = 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_PATH = hf_hub_download(REPO_ID, filename="mlp_model.h5") |
|
|
LABEL_MAP_PATH = hf_hub_download(REPO_ID, filename="label_mapping.json") |
|
|
|
|
|
with open(LABEL_MAP_PATH, "r", encoding="utf-8") as f: |
|
|
mapping = json.load(f) |
|
|
INDEX_TO_LABEL = {int(k): v for k, v in mapping["index_to_label"].items()} |
|
|
|
|
|
|
|
|
MODEL = keras.models.load_model(MODEL_PATH, compile=False) |
|
|
|
|
|
|
|
|
ENCODER = SentenceTransformer(EMBED_MODEL, device="cpu") |
|
|
|
|
|
def encode_text(text: str, text_type: str = "abstract") -> np.ndarray: |
|
|
"""Encode text into normalized embedding compatible with the classifier.""" |
|
|
prefix = TEXT_PREFIX.get(text_type, "") |
|
|
emb = ENCODER.encode([prefix + text], normalize_embeddings=True) |
|
|
return emb.astype("float32") |
|
|
|
|
|
def predict(text: str, text_type: str, threshold: float, topk: int): |
|
|
"""Return selected labels (by threshold), top-k labels, and a scores table.""" |
|
|
text = (text or "").strip() |
|
|
if not text: |
|
|
return ("", "", []) |
|
|
|
|
|
X = encode_text(text, text_type=text_type) |
|
|
probs = MODEL.predict(X, verbose=0)[0] |
|
|
|
|
|
|
|
|
pred_ids = [i for i, p in enumerate(probs) if p >= threshold] |
|
|
pred_labels = [INDEX_TO_LABEL[i] for i in pred_ids] |
|
|
pred_display = ", ".join(pred_labels) if pred_labels else "—" |
|
|
|
|
|
|
|
|
topk = max(1, int(topk)) |
|
|
order = np.argsort(-probs)[:topk] |
|
|
topk_items = [f"{INDEX_TO_LABEL[i]}: {probs[i]:.3f}" for i in order] |
|
|
topk_display = "\n".join(topk_items) |
|
|
|
|
|
|
|
|
sorted_ids = np.argsort(-probs) |
|
|
table = [[INDEX_TO_LABEL[i], float(probs[i])] for i in sorted_ids] |
|
|
|
|
|
return pred_display, topk_display, table |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(fill_height=True) as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
# Lit2Vec Subfield Classifier |
|
|
Enter a **chemistry abstract or summary**. The app encodes it with `e5-large-v2` and predicts one or more **subfields** using the MLP model. |
|
|
|
|
|
**Model:** `Bocklitz-Lab/lit2vec-subfield-classifier-model` |
|
|
**Encoder:** `intfloat/e5-large-v2` |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
text_type = gr.Radio( |
|
|
choices=["abstract", "summary"], value="abstract", label="Text type (prefix used for encoding)" |
|
|
) |
|
|
threshold = gr.Slider(0.0, 1.0, value=DEFAULT_THRESHOLD, step=0.01, label="Decision threshold") |
|
|
topk = gr.Slider(1, 10, value=TOPK_DEFAULT, step=1, label="Top-K to display") |
|
|
|
|
|
input_box = gr.Textbox( |
|
|
label="Paste abstract / summary", |
|
|
placeholder="Paste your chemistry abstract here…", |
|
|
lines=12, |
|
|
value="Ultraviolet B (UVB; 290~320nm) irradiation-induced lipid peroxidation induces inflammatory responses that lead to skin wrinkle formation and epidermal thickening. Peroxisome proliferator-activated receptor (PPAR) α/γ dual agonists have the potential to be used as anti-wrinkle agents because they inhibit inflammatory response and lipid peroxidation. In this study, we evaluated the function of 2-bromo-4-(5-chloro-benzo[d]thiazol-2-yl) phenol (MHY 966), a novel synthetic PPAR α/γ dual agonist, and investigated its anti-inflammatory and anti-lipid peroxidation effects. The action of MHY 966 as a PPAR α/γ dual agonist was also determined in vitro by reporter gene assay. Additionally, 8-week-old melanin-possessing hairless mice 2 (HRM2) were exposed to 150 mJ/cm2 UVB every other day for 17 days and MHY 966 was simultaneously pre-treated every day for 17 days to investigate the molecular mechanisms involved. MHY 966 was found to stimulate the transcriptional activities of both PPAR α and γ. In HRM2 mice, we found that the skins of mice exposed to UVB showed significantly increased pro-inflammatory mediator levels (NF-κB, iNOS, and COX-2) and increased lipid peroxidation, whereas MHY 966 co-treatment down-regulated these effects of UVB by activating PPAR α and γ. Thus, the present study shows that MHY 966 exhibits beneficial effects on inflammatory responses and lipid peroxidation by simultaneously activating PPAR α and γ. The major finding of this study is that MHY 966 demonstrates potential as an agent against wrinkle formation associated with chronic UVB exposure." |
|
|
) |
|
|
|
|
|
run_btn = gr.Button("Predict subfield(s)") |
|
|
|
|
|
with gr.Row(): |
|
|
selected_labels = gr.Textbox(label="Predicted fields (thresholded)", lines=2) |
|
|
topk_labels = gr.Textbox(label="Top-K (scores)", lines=6) |
|
|
|
|
|
scores_table = gr.Dataframe( |
|
|
headers=["Subfield", "Score"], |
|
|
datatype=["str", "number"], |
|
|
label="All scores (sorted)", |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
run_btn.click( |
|
|
fn=predict, |
|
|
inputs=[input_box, text_type, threshold, topk], |
|
|
outputs=[selected_labels, topk_labels, scores_table] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
demo.launch() |
|
|
|