Spaces:
Sleeping
Sleeping
updated app.py to clean text input before sending to model
Browse files
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
from fastapi import FastAPI
|
| 3 |
from pydantic import BaseModel
|
| 4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
@@ -26,10 +27,20 @@ except Exception as e:
|
|
| 26 |
class InputText(BaseModel):
|
| 27 |
text: str
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
@app.post("/summarize")
|
| 30 |
async def summarize(input: InputText):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
inputs = tokenizer(
|
| 32 |
-
|
| 33 |
return_tensors="pt",
|
| 34 |
max_length=16384,
|
| 35 |
truncation=True,
|
|
|
|
| 1 |
import os
|
| 2 |
+
import re
|
| 3 |
from fastapi import FastAPI
|
| 4 |
from pydantic import BaseModel
|
| 5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
|
| 27 |
class InputText(BaseModel):
|
| 28 |
text: str
|
| 29 |
|
| 30 |
+
def clean_text(text: str) -> str:
|
| 31 |
+
text = re.sub(r"[\r\n\t]+", " ", text)
|
| 32 |
+
text = re.sub(r"\s{2,}", " ", text)
|
| 33 |
+
text = text.strip()
|
| 34 |
+
return text
|
| 35 |
+
|
| 36 |
@app.post("/summarize")
|
| 37 |
async def summarize(input: InputText):
|
| 38 |
+
|
| 39 |
+
cleaned_input = clean_text(input.text)
|
| 40 |
+
prompt = f"summarize: {cleaned_input}"
|
| 41 |
+
|
| 42 |
inputs = tokenizer(
|
| 43 |
+
prompt,
|
| 44 |
return_tensors="pt",
|
| 45 |
max_length=16384,
|
| 46 |
truncation=True,
|