import gradio as gr import pandas as pd import requests import os import time from collections import Counter from io import StringIO # ===================================================== # CONFIGURAÇÃO # ===================================================== MODEL_ID = "dslim/bert-base-NER" API_URL = f"https://router.huggingface.co/{MODEL_ID}" HF_API_TOKEN = os.getenv("HF_API_TOKEN") if not HF_API_TOKEN: raise RuntimeError("HF_API_TOKEN não encontrado. Configure em Settings → Secrets.") HEADERS = { "Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json" } # ===================================================== # FUNÇÃO PRINCIPAL # ===================================================== def analyze_abstracts(csv_text): # ---------- Ler CSV ---------- try: df = pd.read_csv(StringIO(csv_text)) except Exception as e: return pd.DataFrame( [["Erro ao ler CSV", str(e), ""]], columns=["Term", "Frequency", "Hot"] ) if "abstract" not in df.columns: return pd.DataFrame( [["Erro", "Coluna 'abstract' não encontrada", ""]], columns=["Term", "Frequency", "Hot"] ) all_terms = [] # ---------- Processar abstracts ---------- for abstract in df["abstract"].dropna(): payload = { "inputs": abstract, "parameters": { "aggregation_strategy": "simple" }, "options": { "wait_for_model": True } } try: response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60) result = response.json() # Caso modelo ainda esteja carregando if isinstance(result, dict) and "error" in result: print("Aviso da API:", result) continue # Caso correto if isinstance(result, list): for ent in result: term = ent.get("word", "").strip().lower() if len(term) > 2: all_terms.append(term) except Exception as e: print("Erro na requisição:", e) continue if not all_terms: return pd.DataFrame( [["Nenhum termo médico encontrado (verifique logs da API)", "", ""]], columns=["Term", "Frequency", "Hot"] ) # ---------- Frequência ---------- term_counts = Counter(all_terms) df_terms = ( pd.DataFrame(term_counts.items(), columns=["Term", "Frequency"]) .sort_values(by="Frequency", ascending=False) .reset_index(drop=True) ) # ---------- Hot terms ---------- threshold = df_terms["Frequency"].quantile(0.90) df_terms["Hot"] = df_terms["Frequency"] >= threshold return df_terms # ===================================================== # INTERFACE # ===================================================== interface = gr.Interface( fn=analyze_abstracts, inputs=gr.Textbox( lines=18, placeholder="Cole aqui seu CSV com colunas: date,abstract" ), outputs=gr.Dataframe(), title="Medical Hot Terms Analyzer", description=( "Extrai termos médicos de abstracts usando PubMedBERT (NER) " "e identifica hot terms (top 10%)." ), ) if __name__ == "__main__": interface.launch()