Spaces:

Yousuf-Islam
/

BanglaBertNewTrained

Sleeping

App Files Files Community

BanglaBertNewTrained / main.py

Yousuf-Islam

Rename app.py to main.py

acf5a33 verified 5 months ago

raw

history blame contribute delete

2.27 kB

	import re
	import string
	from fastapi import FastAPI
	from pydantic import BaseModel
	from transformers import pipeline

	# --- 1. Define the Cleaning Function (No Pandas needed) ---
	def clean_text(text: str) -> str:
	"""
	Comprehensive text cleaning for Bengali text.
	Optimized for API usage (removed pandas dependency).
	"""
	if not text or not isinstance(text, str):
	return ""

	# Remove URLs
	text = re.sub(r'http\S+\|www\S+\|https\S+', '', text, flags=re.MULTILINE)

	# Remove Emails
	text = re.sub(r'\S+@\S+', '', text)

	# Remove Mentions/Hashtags
	text = re.sub(r'@\w+\|#\w+', '', text)

	# Remove Emojis (Unicode range)
	text = re.sub(r'[\U00010000-\U0010ffff]', '', text)

	# Remove Punctuation (English + Bengali Dari/Double Dari)
	# Note: Ensure these characters are correct for your needs
	exclude_chars = string.punctuation + "।॥''"""
	text = text.translate(str.maketrans('', '', exclude_chars))

	# Normalize whitespace
	text = ' '.join(text.split())

	# Remove very short texts
	if len(text.strip()) < 2:
	return ""

	return text.strip()

	# --- 2. Initialize App & Model ---
	app = FastAPI()

	# Load your model here (ensure files are uploaded if local)
	# classifier = pipeline("text-classification", model="./my_model_path")
	# Or if testing with a generic one:
	classifier = pipeline("sentiment-analysis")

	# --- 3. Define Input Structure ---
	class TextInput(BaseModel):
	text: str

	# --- 4. Define Endpoints ---
	@app.get("/")
	def home():
	return {"message": "Bengali Model API is running"}

	@app.post("/predict")
	def predict(input_data: TextInput):
	# STEP 1: Clean the input
	cleaned_text = clean_text(input_data.text)

	# STEP 2: Handle empty results (if cleaning removed everything)
	if not cleaned_text:
	return {
	"error": "Input text contained only noise (urls, emojis, etc.)",
	"original_input": input_data.text,
	"cleaned_input": ""
	}

	# STEP 3: Pass CLEANED text to model
	prediction = classifier(cleaned_text)

	return {
	"original_input": input_data.text,
	"cleaned_input": cleaned_text,
	"prediction": prediction
	}