Spaces:
Sleeping
Sleeping
| import re | |
| import string | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import pipeline | |
| # --- 1. Define the Cleaning Function (No Pandas needed) --- | |
| def clean_text(text: str) -> str: | |
| """ | |
| Comprehensive text cleaning for Bengali text. | |
| Optimized for API usage (removed pandas dependency). | |
| """ | |
| if not text or not isinstance(text, str): | |
| return "" | |
| # Remove URLs | |
| text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) | |
| # Remove Emails | |
| text = re.sub(r'\S+@\S+', '', text) | |
| # Remove Mentions/Hashtags | |
| text = re.sub(r'@\w+|#\w+', '', text) | |
| # Remove Emojis (Unicode range) | |
| text = re.sub(r'[\U00010000-\U0010ffff]', '', text) | |
| # Remove Punctuation (English + Bengali Dari/Double Dari) | |
| # Note: Ensure these characters are correct for your needs | |
| exclude_chars = string.punctuation + "।॥''""" | |
| text = text.translate(str.maketrans('', '', exclude_chars)) | |
| # Normalize whitespace | |
| text = ' '.join(text.split()) | |
| # Remove very short texts | |
| if len(text.strip()) < 2: | |
| return "" | |
| return text.strip() | |
| # --- 2. Initialize App & Model --- | |
| app = FastAPI() | |
| # Load your model here (ensure files are uploaded if local) | |
| # classifier = pipeline("text-classification", model="./my_model_path") | |
| # Or if testing with a generic one: | |
| classifier = pipeline("sentiment-analysis") | |
| # --- 3. Define Input Structure --- | |
| class TextInput(BaseModel): | |
| text: str | |
| # --- 4. Define Endpoints --- | |
| def home(): | |
| return {"message": "Bengali Model API is running"} | |
| def predict(input_data: TextInput): | |
| # STEP 1: Clean the input | |
| cleaned_text = clean_text(input_data.text) | |
| # STEP 2: Handle empty results (if cleaning removed everything) | |
| if not cleaned_text: | |
| return { | |
| "error": "Input text contained only noise (urls, emojis, etc.)", | |
| "original_input": input_data.text, | |
| "cleaned_input": "" | |
| } | |
| # STEP 3: Pass CLEANED text to model | |
| prediction = classifier(cleaned_text) | |
| return { | |
| "original_input": input_data.text, | |
| "cleaned_input": cleaned_text, | |
| "prediction": prediction | |
| } |