Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from fastapi import FastAPI, HTTPException, Query | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| import logging | |
| import time | |
| import os | |
| import sys | |
| import traceback | |
| from typing import Optional | |
| import psutil | |
| # লগিং কনফিগারেশন | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(sys.stdout) | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # ============================================ | |
| # Hugging Face মডেল কনফিগারেশন | |
| # ============================================ | |
| # সরাসরি Hugging Face থেকে মডেল | |
| MODEL_NAME = "Qwen/Qwen3-0.6B" # এই মডেলটি Hugging Face এ আছে | |
| # ============================================ | |
| # FastAPI অ্যাপ | |
| # ============================================ | |
| app = FastAPI( | |
| title="Qwen3-0.6B API from Hugging Face", | |
| description="Hugging Face থেকে সরাসরি লোড করা Qwen3-0.6B মডেল", | |
| version="1.0.0" | |
| ) | |
| # CORS middleware | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # গ্লোবাল ভেরিয়েবল | |
| model = None | |
| tokenizer = None | |
| model_loaded = False | |
| load_error = None | |
| load_time = None | |
| # Pydantic মডেল | |
| class GenerationRequest(BaseModel): | |
| prompt: str | |
| max_length: Optional[int] = 200 | |
| temperature: Optional[float] = 0.7 | |
| class GenerationResponse(BaseModel): | |
| success: bool | |
| prompt: str | |
| generated_text: str | |
| model_name: str | |
| inference_time: float | |
| device_used: str | |
| model_source: str = "Hugging Face" | |
| # ============================================ | |
| # Hugging Face থেকে মডেল লোড করার ফাংশন | |
| # ============================================ | |
| def load_model_from_huggingface(): | |
| """Hugging Face থেকে সরাসরি মডেল লোড করা""" | |
| global model, tokenizer, model_loaded, load_error, load_time | |
| try: | |
| start_load = time.time() | |
| logger.info("=" * 60) | |
| logger.info("🤗 Hugging Face থেকে মডেল লোড করা শুরু") | |
| logger.info("=" * 60) | |
| logger.info(f"মডেল: {MODEL_NAME}") | |
| logger.info(f"প্ল্যাটফর্ম: Hugging Face Hub") | |
| # সিস্টেম ইনফো | |
| logger.info(f"পাইথন: {sys.version}") | |
| logger.info(f"PyTorch: {torch.__version__}") | |
| logger.info(f"CPU Cores: {psutil.cpu_count()}") | |
| memory = psutil.virtual_memory() | |
| logger.info(f"RAM: {memory.total / (1024**3):.2f} GB") | |
| # Hugging Face কানেকশন টেস্ট | |
| logger.info("🔗 Hugging Face এ কানেক্ট করা হচ্ছে...") | |
| # Step 1: টোকেনাইজার লোড | |
| logger.info("📝 Step 1/3: টোকেনাইজার লোড করা হচ্ছে...") | |
| logger.info(f"হাগিং ফেস থেকে ফেচ: {MODEL_NAME}") | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_NAME, | |
| trust_remote_code=True, | |
| use_fast=True | |
| ) | |
| logger.info("✅ টোকেনাইজার লোড হয়েছে") | |
| # Step 2: Padding token সেট করা | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| logger.info("✅ Pad token সেট করা হয়েছে") | |
| # Step 3: মডেল লোড | |
| logger.info("🤖 Step 2/3: মডেল লোড করা হচ্ছে...") | |
| logger.info(f"হাগিং ফেস থেকে ডাউনলোড: {MODEL_NAME}") | |
| # CPU-র জন্য অপ্টিমাইজড লোডিং | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True, | |
| use_cache=True | |
| ) | |
| logger.info("✅ মডেল ডাউনলোড সম্পন্ন") | |
| # Step 4: মডেল CPU-তে সেট করা | |
| logger.info("💻 Step 3/3: মডেল CPU-তে সেট করা হচ্ছে...") | |
| model.eval() | |
| logger.info("✅ মডেল ready") | |
| # লোড টাইম | |
| load_time = time.time() - start_load | |
| logger.info(f"✅ মডেল সফলভাবে লোড হয়েছে!") | |
| logger.info(f"⏱ লোড টাইম: {load_time:.2f} সেকেন্ড") | |
| logger.info(f"📊 মডেল সাইজ: {model.num_parameters():,} প্যারামিটার") | |
| # টেস্ট ইনফারেন্স | |
| logger.info("🧪 টেস্ট ইনফারেন্স চলছে...") | |
| test_prompt = "Hello" | |
| inputs = tokenizer(test_prompt, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=5, | |
| do_sample=False | |
| ) | |
| test_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| logger.info(f"✅ টেস্ট সফল: '{test_prompt}' -> '{test_output}'") | |
| model_loaded = True | |
| return True | |
| except Exception as e: | |
| load_error = str(e) | |
| logger.error(f"❌ Hugging Face থেকে মডেল লোড করতে সমস্যা: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| # ডিটেইলড এরর | |
| if "404" in str(e): | |
| logger.error("মডেলটি Hugging Face এ খুঁজে পাওয়া যায়নি। নাম চেক করুন: Qwen/Qwen3-0.6B") | |
| elif "connection" in str(e).lower(): | |
| logger.error("Hugging Face এ কানেক্ট করা যাচ্ছে না। ইন্টারনেট চেক করুন") | |
| elif "memory" in str(e).lower(): | |
| logger.error("RAM কম! মডেল লোড করার জন্য পর্যাপ্ত মেমোরি নেই") | |
| return False | |
| # Startup ইভেন্ট | |
| async def startup_event(): | |
| """অ্যাপ স্টার্টআপে Hugging Face থেকে মডেল লোড""" | |
| success = load_model_from_huggingface() | |
| if success: | |
| logger.info("🚀 সার্ভার চালু হয়েছে - Hugging Face মডেল ready") | |
| else: | |
| logger.error("❌ সার্ভার চালু হয়েছে কিন্তু মডেল লোড হয়নি") | |
| # ============================================ | |
| # API এন্ডপয়েন্ট | |
| # ============================================ | |
| async def health_check(): | |
| """হেলথ চেক - মডেলের অবস্থা দেখায়""" | |
| memory = psutil.virtual_memory() | |
| return { | |
| "status": "healthy" if model_loaded else "unhealthy", | |
| "model": { | |
| "name": MODEL_NAME, | |
| "loaded": model_loaded, | |
| "source": "Hugging Face", | |
| "load_time": load_time, | |
| "parameters": model.num_parameters() if model else None | |
| }, | |
| "system": { | |
| "device": "cpu", | |
| "cpu_cores": psutil.cpu_count(), | |
| "ram_total_gb": round(memory.total / (1024**3), 2), | |
| "ram_available_gb": round(memory.available / (1024**3), 2), | |
| "ram_used_percent": memory.percent | |
| }, | |
| "error": load_error if not model_loaded else None, | |
| "timestamp": time.time() | |
| } | |
| async def generate_text_get( | |
| prompt: str = Query(..., description="আপনার প্রশ্ন/প্রম্পট"), | |
| max_length: Optional[int] = Query(200, description="জেনারেট করার সর্বোচ্চ দৈর্ঘ্য"), | |
| temperature: Optional[float] = Query(0.7, description="টেম্পারেচার (0-1)") | |
| ): | |
| """GET Method - Hugging Face মডেল দিয়ে টেক্সট জেনারেশন""" | |
| if not model_loaded or model is None or tokenizer is None: | |
| error_msg = load_error if load_error else "মডেল লোড হয়নি" | |
| raise HTTPException(status_code=503, detail=f"Hugging Face মডেল লোড হয়নি: {error_msg}") | |
| try: | |
| start_time = time.time() | |
| logger.info(f"📥 প্রম্পট: {prompt[:100]}...") | |
| # ইনপুট টোকেনাইজ | |
| inputs = tokenizer( | |
| prompt, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512 | |
| ) | |
| input_tokens = inputs['input_ids'].shape[1] | |
| logger.info(f"ইনপুট টোকেন: {input_tokens}") | |
| # জেনারেশন | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_length, | |
| temperature=temperature, | |
| do_sample=True if temperature > 0 else False, | |
| top_p=0.9, | |
| pad_token_id=tokenizer.pad_token_id, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # ডিকোড | |
| generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # প্রম্পট বাদ দিয়ে শুধু জেনারেটেড টেক্সট | |
| if generated_text.startswith(prompt): | |
| generated_text = generated_text[len(prompt):].strip() | |
| output_tokens = outputs.shape[1] - input_tokens | |
| inference_time = time.time() - start_time | |
| logger.info(f"✅ জেনারেটেড: {len(generated_text)} chars, {output_tokens} tokens, {inference_time:.2f}s") | |
| return GenerationResponse( | |
| success=True, | |
| prompt=prompt, | |
| generated_text=generated_text, | |
| model_name=MODEL_NAME, | |
| inference_time=round(inference_time, 3), | |
| device_used="cpu", | |
| model_source="Hugging Face" | |
| ) | |
| except Exception as e: | |
| logger.error(f"জেনারেশন এরর: {str(e)}") | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # ============================================ | |
| # Gradio UI (Hugging Face Space এর জন্য) | |
| # ============================================ | |
| def generate_with_gradio(prompt, max_length, temperature): | |
| """Gradio UI এর জন্য জেনারেশন ফাংশন""" | |
| if not model_loaded: | |
| return f"❌ মডেল লোড হয়নি: {load_error}", 0 | |
| try: | |
| start_time = time.time() | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=int(max_length), | |
| temperature=float(temperature), | |
| do_sample=True if temperature > 0 else False, | |
| top_p=0.9, | |
| pad_token_id=tokenizer.pad_token_id, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| generated = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| if generated.startswith(prompt): | |
| generated = generated[len(prompt):].strip() | |
| inference_time = time.time() - start_time | |
| return generated, round(inference_time, 3) | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}", 0 | |
| # Gradio UI তৈরি | |
| with gr.Blocks(title="Qwen3-0.6B from Hugging Face", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(f""" | |
| # 🤗 Hugging Face মডেল: {MODEL_NAME} | |
| এই মডেলটি সরাসরি Hugging Face থেকে লোড করা হয়েছে। | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt_input = gr.Textbox( | |
| label="আপনার প্রম্পট", | |
| placeholder="এখানে লিখুন...", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| max_length_slider = gr.Slider(50, 500, value=200, label="Max Length") | |
| temp_slider = gr.Slider(0.1, 1.5, value=0.7, label="Temperature") | |
| generate_btn = gr.Button("🚀 Generate", variant="primary") | |
| # Status | |
| model_status = gr.Label( | |
| value=f"✅ {MODEL_NAME} লোড হয়েছে" if model_loaded else f"❌ মডেল লোড হয়নি", | |
| label="মডেল স্ট্যাটাস" | |
| ) | |
| with gr.Column(scale=2): | |
| output_text = gr.Textbox(label="Generated Text", lines=10) | |
| inference_time = gr.Number(label="Time (seconds)") | |
| generate_btn.click( | |
| fn=generate_with_gradio, | |
| inputs=[prompt_input, max_length_slider, temp_slider], | |
| outputs=[output_text, inference_time] | |
| ) | |
| gr.Markdown(f""" | |
| ## 🌐 API Endpoints | |
| **মডেল সোর্স:** Hugging Face 🤗 | |
| **মডেল নাম:** {MODEL_NAME} | |
| ### GET Request: | |
| ``` | |
| /generate?prompt=Hello&max_length=100&temperature=0.7 | |
| ``` | |
| ### Response: | |
| ```json | |
| {{ | |
| "success": true, | |
| "prompt": "Hello", | |
| "generated_text": "World", | |
| "model_name": "Qwen/Qwen3-0.6B", | |
| "model_source": "Hugging Face", | |
| "inference_time": 0.5 | |
| }} | |
| ``` | |
| ### Health Check: | |
| ``` | |
| /health | |
| ``` | |
| """) | |
| # Hugging Face Space এ মাউন্ট | |
| gr.mount_gradio_app(app, demo, path="/") | |
| # ============================================ | |
| # রুট এন্ডপয়েন্ট | |
| # ============================================ | |
| async def root(): | |
| """API রুট""" | |
| return { | |
| "name": "Qwen3-0.6B API", | |
| "source": "Hugging Face 🤗", | |
| "model": MODEL_NAME, | |
| "status": "loaded" if model_loaded else f"error: {load_error}", | |
| "endpoints": { | |
| "GET /health": "Health check", | |
| "GET /generate?prompt=Hello": "Generate text", | |
| "GET /": "This info", | |
| "UI": "/ (Gradio UI)" | |
| }, | |
| "docs": "/docs" | |
| } | |
| async def model_info(): | |
| """মডেল সম্পর্কে বিস্তারিত তথ্য""" | |
| if not model: | |
| raise HTTPException(status_code=503, detail="মডেল লোড হয়নি") | |
| return { | |
| "model_name": MODEL_NAME, | |
| "source": "Hugging Face", | |
| "parameters": model.num_parameters(), | |
| "device": "cpu", | |
| "tokenizer": tokenizer.__class__.__name__, | |
| "vocab_size": tokenizer.vocab_size, | |
| "hf_link": f"https://huggingface.co/{MODEL_NAME}" | |
| } |