Spaces:

Czjun
/

Transformer

Sleeping

App Files Files Community

czjun commited on Apr 9

Commit

897c2d5

1 Parent(s): 9704503

feat: 添加错误处理和模型评估，优化摘要生成逻辑

Browse files

Files changed (3) hide show

README.md +4 -0
__pycache__/app.cpython-310.pyc +0 -0
app.py +34 -19

README.md CHANGED Viewed

@@ -9,3 +9,7 @@ license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+To force a specific transformer model in Spaces, set the `MODEL_NAME` environment variable, for example:
+`IDEA-CCNL/Randeng-T5-Char-57M-MultiTask-Chinese`

__pycache__/app.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -26,6 +26,7 @@ class SummaryOutput:
     summary: str
     backend: str
     used_target_length: Optional[int]
 class SummarizationConfig:
@@ -109,6 +110,7 @@ class HybridSummarizer:
             self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
             self.device = "cuda" if torch.cuda.is_available() else "cpu"
             self.model.to(self.device)
             self.backend_name = "transformer"
             self.load_error = None
         except Exception as exc:
@@ -129,8 +131,14 @@ class HybridSummarizer:
                     backend="transformer",
                     used_target_length=target_length,
                 )
-            except Exception:
-                pass
         return SummaryOutput(
             summary=self.fallback.summarize(text, target_length=target_length),
             backend="fallback",
@@ -138,26 +146,29 @@ class HybridSummarizer:
         )
     def _summarize_with_transformer(self, text: str, target_length: int | None) -> str:
-        prompt = f"summarize: {text}"
         inputs = self.tokenizer(
             prompt,
             return_tensors="pt",
             truncation=True,
-            max_length=SummarizationConfig.max_source_length,
         )
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        max_new_tokens = max(32, min(256, int((target_length or 120) * 1.2)))
-        min_new_tokens = max(16, int(max_new_tokens * 0.4))
-        generated = self.model.generate(
-            **inputs,
-            max_new_tokens=max_new_tokens,
-            min_new_tokens=min_new_tokens,
-            num_beams=SummarizationConfig.num_beams,
-            no_repeat_ngram_size=SummarizationConfig.no_repeat_ngram_size,
-            length_penalty=SummarizationConfig.length_penalty,
-            early_stopping=True,
-        )
-        return self.tokenizer.decode(generated[0], skip_special_tokens=True).strip()
 app = FastAPI(title="Transformer Summarizer Demo", version="1.0.0")
@@ -173,6 +184,7 @@ class SummarizeResponse(BaseModel):
     summary: str
     backend: str
     target_length: int | None
 @app.get("/health")
@@ -192,11 +204,13 @@ def summarize(req: SummarizeRequest):
         summary=result.summary,
         backend=result.backend,
         target_length=result.used_target_length,
     )
 @app.get("/")
 def root():
     html = """
     <!DOCTYPE html>
     <html lang="zh-CN">
@@ -294,6 +308,7 @@ def root():
           <p>这是一个基于 Transformer 的中文文本摘要演示系统。你可以通过下面两个按钮进入接口文档或检查服务状态，也可以直接调用摘要接口。</p>
           <p>当前模型：<code>{engine.model_name}</code></p>
           <p>当前后端：<code>{engine.backend_name}</code></p>
           <div class="btns">
             <a class="btn primary" href="/docs" target="_blank" rel="noreferrer">打开接口文档</a>
@@ -309,9 +324,9 @@ def root():
   "text": "这里放一段较长的中文文本",
   "target_length": 120
 }</code></pre>
-            <p>4. 点击 <code>Execute</code> 后查看返回的摘要结果。</p>
-            <p>5. 如果想确认服务是否正常，可点击 <code>检查服务状态</code>，返回 <code>ok</code> 即表示运行正常。</p>
-            <p>6. 如果健康检查里的 <code>backend</code> 仍然是 <code>fallback</code>，说明 Transformer 模型没有成功加载，请先查看 <code>load_error</code> 的原因。</p>
             <div class="meta">
               提示：如果文本里有换行，请确保是合法 JSON。建议直接在 Swagger 页面提交，避免手写 JSON 出错。
             </div>

     summary: str
     backend: str
     used_target_length: Optional[int]
+    error: Optional[str] = None
 class SummarizationConfig:
             self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
             self.device = "cuda" if torch.cuda.is_available() else "cpu"
             self.model.to(self.device)
+            self.model.eval()
             self.backend_name = "transformer"
             self.load_error = None
         except Exception as exc:
                     backend="transformer",
                     used_target_length=target_length,
                 )
+            except Exception as exc:
+                logger.exception("Transformer generation failed")
+                return SummaryOutput(
+                    summary=self.fallback.summarize(text, target_length=target_length),
+                    backend="fallback",
+                    used_target_length=target_length,
+                    error=f"{type(exc).__name__}: {exc}",
+                )
         return SummaryOutput(
             summary=self.fallback.summarize(text, target_length=target_length),
             backend="fallback",
         )
     def _summarize_with_transformer(self, text: str, target_length: int | None) -> str:
+        prompt = text
         inputs = self.tokenizer(
             prompt,
             return_tensors="pt",
             truncation=True,
+            max_length=512,
         )
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        max_new_tokens = max(48, min(192, int((target_length or 120) * 1.1)))
+        with torch.no_grad():
+            generated = self.model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                num_beams=2,
+                no_repeat_ngram_size=3,
+                length_penalty=1.0,
+                early_stopping=True,
+            )
+        return self.tokenizer.decode(
+            generated[0],
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=True,
+        ).strip()
 app = FastAPI(title="Transformer Summarizer Demo", version="1.0.0")
     summary: str
     backend: str
     target_length: int | None
+    error: str | None = None
 @app.get("/health")
         summary=result.summary,
         backend=result.backend,
         target_length=result.used_target_length,
+        error=result.error,
     )
 @app.get("/")
 def root():
+    error_note = f"<p>最近一次生成错误：<code>{engine.load_error}</code></p>" if engine.load_error else ""
     html = """
     <!DOCTYPE html>
     <html lang="zh-CN">
           <p>这是一个基于 Transformer 的中文文本摘要演示系统。你可以通过下面两个按钮进入接口文档或检查服务状态，也可以直接调用摘要接口。</p>
           <p>当前模型：<code>{engine.model_name}</code></p>
           <p>当前后端：<code>{engine.backend_name}</code></p>
+          """ + error_note + """
           <div class="btns">
             <a class="btn primary" href="/docs" target="_blank" rel="noreferrer">打开接口文档</a>
   "text": "这里放一段较长的中文文本",
   "target_length": 120
 }</code></pre>
+          <p>4. 点击 <code>Execute</code> 后查看返回的摘要结果。</p>
+          <p>5. 如果想确认服务是否正常，可点击 <code>检查服务状态</code>，返回 <code>ok</code> 即表示运行正常。</p>
+          <p>6. 如果接口返回 <code>backend=fallback</code>，请查看响应里的 <code>error</code> 字段，这表示 Transformer 生成阶段失败，系统才会自动切回备用摘要。</p>
             <div class="meta">
               提示：如果文本里有换行，请确保是合法 JSON。建议直接在 Swagger 页面提交，避免手写 JSON 出错。
             </div>