Spaces:

ex510
/

text_embedder

Sleeping

App Files Files Community

ex510 commited on Feb 8

Commit

b610781

verified ·

1 Parent(s): 2f03c22

Update main.py

Browse files

Files changed (1) hide show

main.py +153 -46

main.py CHANGED Viewed

@@ -1,35 +1,74 @@
-from fastapi import FastAPI, HTTPException, BackgroundTasks
 from pydantic import BaseModel, Field
 from sentence_transformers import SentenceTransformer
 import uvicorn
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
-from typing import List
 import numpy as np
 from contextlib import asynccontextmanager
 import httpx
 import os
 # Globals
-model = None
-tokenizer = None
 model_id = 'Qwen/Qwen3-Embedding-0.6B'
-executor = ThreadPoolExecutor(max_workers=4)
-MAX_TOKENS = 32000
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Load the model and tokenizer at startup
     global model, tokenizer
-    print(f"Loading model: {model_id}...")
-    model = SentenceTransformer(model_id)
-    tokenizer = model.tokenizer
-    print("Model loaded successfully")
     yield
     # (Optional) Clean up resources at shutdown
-    print("Cleaning up resources...")
     model = None
     tokenizer = None
 app = FastAPI(
     title="Text Embedding API (Qwen/Qwen3-Embedding-0.6B)",
@@ -38,20 +77,23 @@ app = FastAPI(
 class TextRequest(BaseModel):
     text: str = Field(..., min_length=1, description="Text to embed")
     request_id: str | None = Field(None, description="Optional unique identifier for the request")
-async def send_to_webhook(url: str, data: dict):
     """Sends data to a webhook URL asynchronously."""
     try:
         async with httpx.AsyncClient() as client:
             response = await client.post(url, json=data)
             response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
-            print(f"Successfully sent data to webhook: {url}")
     except httpx.RequestError as e:
-        print(f"Error sending data to webhook {url}: {e}")
 @app.get("/")
 def home():
@@ -59,6 +101,9 @@ def home():
 def chunk_and_embed(text: str) -> List[float]:
     """Split text into chunks if too long, then pool embeddings"""
     tokens = tokenizer.encode(text, add_special_tokens=False)
     # If text is short, embed directly
@@ -67,19 +112,26 @@ def chunk_and_embed(text: str) -> List[float]:
     # Split into chunks
     chunks = []
-    overlap = 50
     start = 0
     while start < len(tokens):
         end = start + MAX_TOKENS
         chunk_tokens = tokens[start:end]
         chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
         chunks.append(chunk_text)
-        if end >= len(tokens):
             break
-        start = end - overlap
     # Embed all chunks
     chunk_embeddings = [model.encode(chunk, normalize_embeddings=True) for chunk in chunks]
     # Pool embeddings (mean)
@@ -87,33 +139,88 @@ def chunk_and_embed(text: str) -> List[float]:
     return final_embedding
-@app.post("/embed/text")
-async def embed_text(request: TextRequest, background_tasks: BackgroundTasks):
     try:
-        loop = asyncio.get_event_loop()
-        embedding = await loop.run_in_executor(
-            executor,
-            lambda: chunk_and_embed(request.text)
-        )
-        # Check for webhook URL and add the background task
-        webhook_url = os.environ.get("WEBHOOK_URL")
-        if webhook_url:
-            payload = {
-                "text": request.text,
-                "embedding": embedding,
-                "request_id": request.request_id
-            }
-            background_tasks.add_task(send_to_webhook, webhook_url, payload)
-        return {
-            "success": True,
-            "model": model_id,
-            "dimension": len(embedding),
-            "embedding": embedding
-        }
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+from fastapi import FastAPI, HTTPException, BackgroundTasks, status
 from pydantic import BaseModel, Field
 from sentence_transformers import SentenceTransformer
 import uvicorn
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
+from typing import List, Dict, Any, Optional
 import numpy as np
 from contextlib import asynccontextmanager
 import httpx
 import os
+import collections
+import logging
+# تهيئة التسجيل (Logging) بدلاً من print
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
 # Globals
+model: Optional[SentenceTransformer] = None
+tokenizer: Any = None # قد لا يكون نوعه واضحًا دائمًا لجميع النماذج
 model_id = 'Qwen/Qwen3-Embedding-0.6B'
+# تم تعديل max_workers إلى 1 لضمان معالجة تسلسلية
+executor = ThreadPoolExecutor(max_workers=1)
+# تم تحديث الحد الأقصى للتوكنز
+MAX_TOKENS = 32000
+# --- إضافة عناصر جديدة لإدارة قائمة الانتظار ---
+request_queue = collections.deque() # قائمة انتظار لتخزين الطلبات
+queue_lock = asyncio.Lock()       # قفل لضمان الوصول الآمن لقائمة الانتظار
+is_processing_queue = False       # مؤشر لمعرفة ما إذا كان المعالج يعمل حاليًا
+# --------------------------------------------------
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Load the model and tokenizer at startup
     global model, tokenizer
+    logger.info(f"Loading model: {model_id}...")
+    try:
+        model = SentenceTransformer(model_id)
+        # محاولة الوصول إلى الـ tokenizer
+        if hasattr(model, 'tokenizer') and model.tokenizer is not None:
+            tokenizer = model.tokenizer
+        else:
+            # إذا لم يكن متاحًا مباشرة، حاول تحميله بشكل منفصل باستخدام Hugging Face transformers
+            try:
+                from transformers import AutoTokenizer
+                tokenizer = AutoTokenizer.from_pretrained(model_id)
+                logger.warning(f"Model {model_id} did not have a direct 'tokenizer' attribute. Loaded separately using AutoTokenizer.")
+            except ImportError:
+                logger.error("transformers library not found. Could not load tokenizer separately.")
+                raise # يجب أن يكون tokenizer موجودًا
+            except Exception as e:
+                logger.error(f"Failed to load tokenizer separately for {model_id}: {e}", exc_info=True)
+                raise
+        logger.info("Model loaded successfully")
+    except Exception as e:
+        logger.critical(f"Failed to load model or tokenizer {model_id}: {e}", exc_info=True)
+        # رفع الاستثناء لضمان عدم بدء التطبيق إذا فشل تحميل النموذج
+        raise
     yield
     # (Optional) Clean up resources at shutdown
+    logger.info("Cleaning up resources...")
     model = None
     tokenizer = None
+    executor.shutdown(wait=True) # إغلاق الـ executor بشكل صحيح
+    logger.info("Resources cleaned up.")
 app = FastAPI(
     title="Text Embedding API (Qwen/Qwen3-Embedding-0.6B)",
 class TextRequest(BaseModel):
     text: str = Field(..., min_length=1, description="Text to embed")
+    # إضافة حقل request_id اختياري
     request_id: str | None = Field(None, description="Optional unique identifier for the request")
+async def send_to_webhook(url: str, data: Dict[str, Any]):
     """Sends data to a webhook URL asynchronously."""
+    request_id = data.get("request_id", "N/A")
     try:
         async with httpx.AsyncClient() as client:
             response = await client.post(url, json=data)
             response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+            logger.info(f"Successfully sent data to webhook (ID: {request_id}): {url}")
     except httpx.RequestError as e:
+        logger.error(f"Error sending data to webhook (ID: {request_id}) {url}: {e}", exc_info=True)
+    except httpx.HTTPStatusError as e:
+        logger.error(f"Webhook HTTP error (ID: {request_id}) {url} - Status: {e.response.status_code}, Response: {e.response.text}", exc_info=True)
+    except Exception as e:
+        logger.error(f"An unexpected error occurred in send_to_webhook (ID: {request_id}): {e}", exc_info=True)
 @app.get("/")
 def home():
 def chunk_and_embed(text: str) -> List[float]:
     """Split text into chunks if too long, then pool embeddings"""
+    if not tokenizer or not model:
+        raise RuntimeError("Model or tokenizer not loaded or initialized correctly.")
     tokens = tokenizer.encode(text, add_special_tokens=False)
     # If text is short, embed directly
     # Split into chunks
     chunks = []
+    overlap = 50 # Overlap tokens - يمكنك تعديلها حسب الحاجة
     start = 0
     while start < len(tokens):
         end = start + MAX_TOKENS
         chunk_tokens = tokens[start:end]
+        # التأكد من أننا لا نحاول فك ترميز قائمة توكنز فارغة
+        if not chunk_tokens:
+            break
         chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
         chunks.append(chunk_text)
+        if end >= len(tokens): # إذا وصلنا إلى نهاية النص
             break
+        start = end - overlap # للبدء من الجزء المتداخل التالي
     # Embed all chunks
+    # Note: If this list comprehension causes memory issues for very long texts,
+    # consider processing chunks in smaller batches or using a generator
     chunk_embeddings = [model.encode(chunk, normalize_embeddings=True) for chunk in chunks]
     # Pool embeddings (mean)
     return final_embedding
+# --- دالة المعالج الجديدة ---
+async def process_queue():
+    global is_processing_queue
+    webhook_url = os.environ.get("WEBHOOK_URL")
+    async with queue_lock:
+        # إذا لم تكن هناك عناصر في قائمة الانتظار، أو كان المعالج يعمل بالفعل، لا تفعل شيئًا
+        if not request_queue or is_processing_queue:
+            return
+        is_processing_queue = True # تعيين المؤشر إلى True للإشارة إلى أن المعالج يعمل
+    logger.info("Starting to process embedding queue (single worker mode)...")
     try:
+        while True:
+            async with queue_lock:
+                if not request_queue:
+                    logger.info("Embedding queue is empty. Stopping processor.")
+                    is_processing_queue = False # إعادة تعيين المؤشر
+                    break # الخروج من الحلقة عند فراغ قائمة الانتظار
+                # استخراج العنصر الأول من قائمة الانتظار
+                request_item = request_queue.popleft()
+                text_to_embed = request_item["text"]
+                request_id_for_webhook = request_item.get("request_id", "N/A")
+            logger.info(f"Processing item from queue (ID: {request_id_for_webhook})...")
+            try:
+                # استخدام asyncio.to_thread هو الأسلوب المفضل لـ Python 3.9+
+                # بما أن max_workers=1 في executor، سيتم ضمان التسلسل
+                embedding = await asyncio.to_thread(chunk_and_embed, text_to_embed)
+                # إعداد الـ payload وإرساله للويب هوك
+                if webhook_url:
+                    payload = {
+                        "text": text_to_embed,
+                        "embedding": embedding,
+                        "request_id": request_id_for_webhook
+                    }
+                    await send_to_webhook(webhook_url, payload)
+                else:
+                    logger.warning(f"WEBHOOK_URL not set. Embedding result for (ID: {request_id_for_webhook}) will not be sent to a webhook.")
+                logger.info(f"Finished processing item (ID: {request_id_for_webhook}).")
+            except Exception as e:
+                logger.error(f"Error processing embedding for item (ID: {request_id_for_webhook}) in queue: {e}", exc_info=True)
+                # هنا يمكنك إضافة منطق لإعادة المحاولة أو تسجيل الخطأ بشكل دائم
+            # السماح بالتأجيل قليلًا لمنع حظر الـ event loop بالكامل إذا كانت المعالجة سريعة جدًا
+            await asyncio.sleep(0.01)
     except Exception as e:
+        logger.critical(f"CRITICAL ERROR in process_queue: {e}", exc_info=True)
+    finally:
+        async with queue_lock:
+            # التأكد من إعادة تعيين المؤشر حتى لو حدث خطأ
+            is_processing_queue = False
+@app.post("/embed/text", status_code=status.HTTP_202_ACCEPTED) # تغيير حالة الاستجابة إلى 202 Accepted
+async def embed_text(request: TextRequest, background_tasks: BackgroundTasks):
+    global is_processing_queue
+    request_data = {"text": request.text}
+    if request.request_id:
+        request_data["request_id"] = request.request_id
+    async with queue_lock:
+        request_queue.append(request_data) # إضافة الطلب إلى قائمة الانتظار
+        logger.info(f"Request (ID: {request.request_id or 'N/A'}) added to queue. Queue size: {len(request_queue)}")
+        # إذا لم يكن هناك معالج يعمل حاليًا، ابدأ واحدًا في الخلفية
+        if not is_processing_queue:
+            is_processing_queue = True # تعيين المؤشر لمنع بدء معالجات متعددة
+            background_tasks.add_task(process_queue)
+            logger.info("Started background queue processor.")
+    # إرجاع استجابة سريعة للعميل لإعلامه بأن الطلب تم استلامه ومعالجته لاحقًا
+    return {
+        "success": True,
+        "message": "Request received and added to queue for processing.",
+        "request_id": request.request_id # إرجاع الـ ID للعميل
+    }
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)