Spaces:

Reyall
/

for_api

Build error

App Files Files Community

Reyall commited on Aug 13, 2025

Commit

1204cc2

verified ·

1 Parent(s): 31457f3

Upload 9 files

Browse files

Files changed (9) hide show

best_model/label_encoder.pkl +3 -0
best_model/model.safetensors +3 -0
best_model/requirements.txt +7 -0
best_model/special_tokens_map.json +7 -0
best_model/tokenizer_config.json +58 -0
best_model/training_args.bin +3 -0
best_model/vocab.txt +0 -0
requirements.txt +7 -0
streamlit_app.py +87 -0

best_model/label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b18fdaa0f8bb34ed643d679d56a1091fa6553747e5389ac4940640909dd8d57
+size 3374

best_model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7af82a381817471667b142e55198705b2a852262e2353585e68c8f14547dcaff
+size 439155212

best_model/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers>=4.30.0
+altair
+pandas
+streamlit
+torch
+scikit-learn
+requests

best_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

best_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7119bd5212d6defb5990d470323a16b95a015f97af1161842fba1d73aa559185
+size 5240

best_model/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers>=4.30.0
+altair
+pandas
+gradio
+torch
+scikit-learn
+requests

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+import sys
+import streamlit as st
+from transformers import BertTokenizer, BertForSequenceClassification  # Burada BertTokenizer istifadə edirik
+import torch
+import pickle
+import random
+from collections import defaultdict
+import requests
+# GitHub-dan fayl yükləmək üçün funksiyanın təyin edilməsi
+def download_label_encoder():
+    url = "https://github.com/AxundovReyal/nlp-disease/raw/main/label_encoder.pkl"
+    headers = {
+        "Authorization": f"token {os.getenv('GITHUB_TOKEN')}"  # GitHub personal access token mühit dəyişəni olaraq qeyd olunmalı
+    }
+    response = requests.get(url, headers=headers)
+    if response.status_code == 200:
+        with open("label_encoder.pkl", "wb") as f:
+            f.write(response.content)
+        print("label_encoder.pkl faylı uğurla yükləndi.")
+    else:
+        raise Exception(f"Fayl yüklənə bilmədi, error kodu: {response.status_code}")
+# Modelin və label_encoder-in yüklənməsi
+@st.cache_resource
+def load_model():
+    # GitHub-dan label_encoder yükləmək
+    download_label_encoder()
+    # Label encoder yüklənməsi əvvəlcə edilir
+    with open("label_encoder.pkl", "rb") as f:
+        label_encoder = pickle.load(f)
+    # Burada AutoTokenizer əvəzinə BertTokenizer istifadə edirik
+    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')  # BERT Tokenizer
+    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_encoder.classes_))  # BERT Model
+    model.eval()
+    return tokenizer, model, label_encoder
+tokenizer, model, label_encoder = load_model()
+st.title("Disease NLP Classifier")
+text = st.text_area("Enter your symptoms separated by commas (e.g. fever, cough, headache):")
+def predict(text_input):
+    inputs = tokenizer(text_input, return_tensors="pt", truncation=True, padding=True, max_length=128)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze()
+    return probs
+if st.button("Predict"):
+    if not text.strip():
+        st.warning("Please enter some symptoms!")
+    else:
+        symptoms = [s.strip() for s in text.split(",") if s.strip()]
+        if not symptoms:
+            st.warning("Please enter valid symptoms separated by commas!")
+        else:
+            agg_probs = defaultdict(float)
+            n_shuffles = 10
+            for _ in range(n_shuffles):
+                random.shuffle(symptoms)
+                shuffled_text = ", ".join(symptoms)
+                probs = predict(shuffled_text)
+                for i, p in enumerate(probs):
+                    agg_probs[i] += p.item()
+            for k in agg_probs:
+                agg_probs[k] /= n_shuffles
+            top_3 = sorted(agg_probs.items(), key=lambda x: x[1], reverse=True)[:3]
+            st.subheader("Top 3 Predicted Diseases (averaged over shuffled inputs):")
+            for idx, prob in top_3:
+                label = label_encoder.classes_[idx]  # Etiketləri doğru alırıq
+                st.write(f"**{label}** — Probability: `{prob * 100:.2f}%`")
+# Render port düzəlişi
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 8501))
+    sys.argv = ["streamlit", "run", "streamlit_app.py", f"--server.port={port}", "--server.address=0.0.0.0"]
+    from streamlit.web.cli import main
+    sys.exit(main())