Spaces:

e1732a364fed
/

test

Sleeping

App Files Files Community

e1732a364fed commited on May 28, 2025

Commit

3a60eea

1 Parent(s): 26b720e

basic feature

Browse files

Files changed (2) hide show

.gitignore +3 -0
app.py +116 -3

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__/
+.venv/
+flagged/

app.py CHANGED Viewed

@@ -1,9 +1,122 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

+def extract_body_text_by_string(input_string, max_len=512):
+    string_length = len(input_string)
+    if string_length <= max_len:
+        return input_string.strip()
+    chunk_size = max_len // 3  # 三等分
+    positions = [0, string_length // 2, string_length - chunk_size]  # 头、中、尾
+    extracted_text = []
+    for pos in positions:
+        text_chunk = input_string[pos : pos + chunk_size]
+        extracted_text.append(text_chunk.strip())
+    return "".join(extracted_text)
+import torch
+DEVICE = torch.device(
+    "cuda"
+    if torch.cuda.is_available()
+    else "mps" if torch.backends.mps.is_available() else "cpu"
+)
+import numpy as np
+def predict_text(text, model, tokenizer, max_len, device=DEVICE):
+    encoding = tokenizer.encode_plus(
+        text,
+        add_special_tokens=True,
+        max_length=max_len,
+        padding="max_length",
+        truncation=True,
+        return_attention_mask=True,
+        return_tensors="pt",
+    )
+    input_ids = encoding["input_ids"].to(device)
+    attention_mask = encoding["attention_mask"].to(device)
+    with torch.no_grad():
+        outputs = model(input_ids, attention_mask=attention_mask)
+        # print("outputs", outputs)
+        logits = outputs.logits.cpu().numpy()
+        pred = np.argmax(logits, axis=1)[0]
+    return "ok" if pred == 0 else "ban"
 import gradio as gr
+print("loading models...")
+from transformers import (
+    BertTokenizer,
+    BertForSequenceClassification,
+)
+head_tokenizer = BertTokenizer.from_pretrained(
+    f"e1732a364fed/bert-geosite-classification-head-v1"
+)
+body_tokenizer = BertTokenizer.from_pretrained(
+    f"e1732a364fed/bert-geosite-classification-body-v1"
+)
+head_model = BertForSequenceClassification.from_pretrained(
+    f"e1732a364fed/bert-geosite-classification-head-v1"
+).to(DEVICE)
+body_model = BertForSequenceClassification.from_pretrained(
+    f"e1732a364fed/bert-geosite-classification-body-v1"
+).to(DEVICE)
+head_model.eval()
+body_model.eval()
+print("loaded models...")
+def func(head, body):
+    print("predicting head")
+    h_result = predict_text(head, head_model, head_tokenizer, 512)
+    print("predicting body")
+    b_result = predict_text(body, body_model, body_tokenizer, 512)
+    print("prediction done")
+    return h_result, b_result
+demo = gr.Interface(
+    fn=func,
+    inputs=[
+        gr.Textbox(
+            label="Head",
+            info="http response head",
+            lines=6,
+            max_lines=10000000000,
+            value="200 OK",
+        ),
+        gr.Textbox(
+            label="Body",
+            info="http response body",
+            lines=6,
+            max_lines=10000000000,
+            value="<body>The quick brown fox jumped over the lazy dogs.</body>",
+        ),
+    ],
+    outputs=[
+        gr.Textbox(
+            label="Head",
+            info="http response prediction",
+        ),
+        gr.Textbox(
+            label="Body",
+            info="http response prediction",
+        ),
+    ],
+)
 demo.launch()