cmbtest commited on
Commit
8fa16c7
·
verified ·
1 Parent(s): b6b71c4

create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import requests
4
+ from collections import Counter
5
+ from io import StringIO
6
+
7
+ # ==============================
8
+ # CONFIGURAÇÃO
9
+ # ==============================
10
+ # Coloque aqui seu token do Hugging Face
11
+ API_URL = "https://api-inference.huggingface.co/models/rebeccakoganlee/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext-finetuned-ner"
12
+ HEADERS = f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
13
+
14
+ # ==============================
15
+ # FUNÇÃO DE ANÁLISE
16
+ # ==============================
17
+ def analyze_abstracts(csv_text):
18
+ """
19
+ Recebe CSV com colunas: date, abstract
20
+ Retorna tabela com termos médicos, frequência e hot terms
21
+ """
22
+ try:
23
+ # Ler CSV colado
24
+ df = pd.read_csv(StringIO(csv_text))
25
+ except Exception as e:
26
+ return pd.DataFrame([["Erro ao ler CSV", str(e), ""]], columns=["Term", "Frequency", "Hot"])
27
+
28
+ if 'abstract' not in df.columns:
29
+ return pd.DataFrame([["Erro", "Coluna 'abstract' não encontrada", ""]], columns=["Term", "Frequency", "Hot"])
30
+
31
+ all_terms = []
32
+
33
+ for abstract in df['abstract']:
34
+ payload = {"inputs": abstract, "parameters": {"task": "token-classification"}}
35
+ try:
36
+ response = requests.post(API_URL, headers=HEADERS, json=payload)
37
+ result = response.json()
38
+ # Extrair palavras das entidades
39
+ if isinstance(result, list):
40
+ terms = [r.get('word', '') for r in result if 'word' in r]
41
+ all_terms.extend(terms)
42
+ except Exception as e:
43
+ print("Erro na API:", e)
44
+ continue
45
+
46
+ if not all_terms:
47
+ return pd.DataFrame([["Nenhum termo encontrado", "", ""]], columns=["Term", "Frequency", "Hot"])
48
+
49
+ # Contar frequência dos termos
50
+ term_counts = Counter(all_terms)
51
+ term_df = pd.DataFrame(term_counts.items(), columns=['Term', 'Frequency']).sort_values(by='Frequency', ascending=False)
52
+
53
+ # Marcar hot terms (top 10%)
54
+ threshold = term_df['Frequency'].quantile(0.9)
55
+ term_df['Hot'] = term_df['Frequency'] >= threshold
56
+
57
+ return term_df
58
+
59
+ # ==============================
60
+ # INTERFACE GRADIO
61
+ # ==============================
62
+ iface = gr.Interface(
63
+ fn=analyze_abstracts,
64
+ inputs=gr.Textbox(lines=15, placeholder="Cole aqui seu CSV com colunas: date,abstract"),
65
+ outputs=gr.Dataframe(headers=["Term", "Frequency", "Hot"]),
66
+ title="Hot Terms Médicos",
67
+ description="Cole o CSV com data e abstract. O app identifica termos médicos e mostra os hot terms (top 10% mais frequentes)."
68
+ )
69
+
70
+ # ==============================
71
+ # RODAR APP
72
+ # ==============================
73
+ if __name__ == "__main__":
74
+ iface.launch()