Spaces:

ex510
/

text_embedder

Sleeping

App Files Files Community

ex510 commited on Feb 9

Commit

fcc9024

verified ·

1 Parent(s): 81a215d

Update main.py

Browse files

Files changed (1) hide show

main.py +52 -167

main.py CHANGED Viewed

@@ -1,77 +1,35 @@
-from fastapi import FastAPI, HTTPException, BackgroundTasks, status
 from pydantic import BaseModel, Field
 from sentence_transformers import SentenceTransformer
 import uvicorn
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
-from typing import List, Dict, Any, Optional
 import numpy as np
 from contextlib import asynccontextmanager
 import httpx
 import os
-import collections
-import logging
-import nest_asyncio
-import threading
-import time
-# تهيئة التسجيل (Logging) بدلاً من print
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
 # Globals
-model: Optional[SentenceTransformer] = None
-tokenizer: Any = None # قد لا يكون نوعه واضحًا دائمًا لجميع النماذج
 model_id = 'Qwen/Qwen3-Embedding-0.6B'
-# تم تعديل max_workers إلى 1 لضمان معالجة تسلسلية
-executor = ThreadPoolExecutor(max_workers=1)
-# تم تحديث الحد الأقصى للتوكنز
 MAX_TOKENS = 32000
-# --- إضافة عناصر جديدة لإدارة قائمة الانتظار ---
-request_queue = collections.deque() # قائمة انتظار لتخزين الطلبات
-queue_lock = asyncio.Lock()       # قفل لضمان الوصول الآمن لقائمة الانتظار
-is_processing_queue = False       # مؤشر لمعرفة ما إذا كان المعالج يعمل حاليًا
-# --------------------------------------------------
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Load the model and tokenizer at startup
     global model, tokenizer
-    logger.info(f"Loading model: {model_id}...")
-    try:
-        model = SentenceTransformer(model_id)
-        # محاولة الوصول إلى الـ tokenizer
-        if hasattr(model, 'tokenizer') and model.tokenizer is not None:
-            tokenizer = model.tokenizer
-        else:
-            # إذا لم يكن متاحًا مباشرة، حاول تحميله بشكل منفصل باستخدام Hugging Face transformers
-            try:
-                from transformers import AutoTokenizer
-                tokenizer = AutoTokenizer.from_pretrained(model_id)
-                logger.warning(f"Model {model_id} did not have a direct 'tokenizer' attribute. Loaded separately using AutoTokenizer.")
-            except ImportError:
-                logger.error("transformers library not found. Could not load tokenizer separately.")
-                raise # يجب أن يكون tokenizer موجودًا
-            except Exception as e:
-                logger.error(f"Failed to load tokenizer separately for {model_id}: {e}", exc_info=True)
-                raise
-        logger.info("Model loaded successfully")
-    except Exception as e:
-        logger.critical(f"Failed to load model or tokenizer {model_id}: {e}", exc_info=True)
-        # رفع الاستثناء لضمان عدم بدء التطبيق إذا فشل تحميل النموذج
-        raise
     yield
     # (Optional) Clean up resources at shutdown
-    logger.info("Cleaning up resources...")
     model = None
     tokenizer = None
-    executor.shutdown(wait=True) # إغلاق الـ executor بشكل صحيح
-    logger.info("Resources cleaned up.")
 app = FastAPI(
     title="Text Embedding API (Qwen/Qwen3-Embedding-0.6B)",
@@ -80,23 +38,20 @@ app = FastAPI(
 class TextRequest(BaseModel):
     text: str = Field(..., min_length=1, description="Text to embed")
-    # إضافة حقل request_id اختياري
     request_id: str | None = Field(None, description="Optional unique identifier for the request")
-async def send_to_webhook(url: str, data: Dict[str, Any]):
     """Sends data to a webhook URL asynchronously."""
-    request_id = data.get("request_id", "N/A")
     try:
         async with httpx.AsyncClient() as client:
             response = await client.post(url, json=data)
             response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
-            logger.info(f"Successfully sent data to webhook (ID: {request_id}): {url}")
     except httpx.RequestError as e:
-        logger.error(f"Error sending data to webhook (ID: {request_id}) {url}: {e}", exc_info=True)
-    except httpx.HTTPStatusError as e:
-        logger.error(f"Webhook HTTP error (ID: {request_id}) {url} - Status: {e.response.status_code}, Response: {e.response.text}", exc_info=True)
-    except Exception as e:
-        logger.error(f"An unexpected error occurred in send_to_webhook (ID: {request_id}): {e}", exc_info=True)
 @app.get("/")
 def home():
@@ -104,131 +59,61 @@ def home():
 def chunk_and_embed(text: str) -> List[float]:
     """Split text into chunks if too long, then pool embeddings"""
-    if not tokenizer or not model:
-        raise RuntimeError("Model or tokenizer not loaded or initialized correctly.")
     tokens = tokenizer.encode(text, add_special_tokens=False)
     # If text is short, embed directly
     if len(tokens) <= MAX_TOKENS:
         return model.encode(text, normalize_embeddings=True).tolist()
     # Split into chunks
     chunks = []
-    overlap = 50 # Overlap tokens - يمكنك تعديلها حسب الحاجة
     start = 0
     while start < len(tokens):
         end = start + MAX_TOKENS
         chunk_tokens = tokens[start:end]
-        # التأكد من أننا لا نحاول فك ترميز قائمة توكنز فارغة
-        if not chunk_tokens:
-            break
         chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
         chunks.append(chunk_text)
-        if end >= len(tokens): # إذا وصلنا إلى نهاية النص
             break
-        start = end - overlap # للبدء من الجزء المتداخل التالي
     # Embed all chunks
-    # Note: If this list comprehension causes memory issues for very long texts,
-    # consider processing chunks in smaller batches or using a generator
     chunk_embeddings = [model.encode(chunk, normalize_embeddings=True) for chunk in chunks]
     # Pool embeddings (mean)
     final_embedding = np.mean(chunk_embeddings, axis=0).tolist()
     return final_embedding
-# --- دالة المعالج الجديدة ---
-async def process_queue():
-    global is_processing_queue
-    webhook_url = os.environ.get("WEBHOOK_URL")
-    async with queue_lock:
-        # إذا لم تكن هناك عناصر في قائمة الانتظار، أو كان المعالج يعمل بالفعل، لا تفعل شيئًا
-        if not request_queue or is_processing_queue:
-            return
-        is_processing_queue = True # تعيين المؤشر إلى True للإشارة إلى أن المعالج يعمل
-    logger.info("Starting to process embedding queue (single worker mode)...")
     try:
-        while True:
-            async with queue_lock:
-                if not request_queue:
-                    logger.info("Embedding queue is empty. Stopping processor.")
-                    is_processing_queue = False # إعادة تعيين المؤشر
-                    break # الخروج من الحلقة عند فراغ قائمة الانتظار
-                # استخراج العنصر الأول من قائمة الانتظار
-                request_item = request_queue.popleft()
-                text_to_embed = request_item["text"]
-                request_id_for_webhook = request_item.get("request_id", "N/A")
-            logger.info(f"Processing item from queue (ID: {request_id_for_webhook})... ")
-            try:
-                embedding = await asyncio.to_thread(chunk_and_embed, text_to_embed)
-                logger.info(f"Embedding successful for item (ID: {request_id_for_webhook}). Checking webhook configuration...")
-                if webhook_url:
-                    payload = {
-                        "text": text_to_embed,
-                        "embedding": embedding,
-                        "request_id": request_id_for_webhook
-                    }
-                    await send_to_webhook(webhook_url, payload)
-                else:
-                    logger.warning(f"WEBHOOK_URL not set. Embedding result for (ID: {request_id_for_webhook}) will not be sent to a webhook.")
-                logger.info(f"Finished processing item (ID: {request_id_for_webhook}).")
-            except Exception as e:
-                logger.error(f"Error during chunk_and_embed for item (ID: {request_id_for_webhook}) in queue: {e}", exc_info=True)
-            # السماح بالتأجيل قليلًا لمنع حظر الـ event loop بالكامل إذا كانت المعالجة سريعة جدًا
-            await asyncio.sleep(0.01)
     except Exception as e:
-        logger.critical(f"CRITICAL ERROR in process_queue: {e}", exc_info=True)
-    finally:
-        async with queue_lock:
-            # التأكد من إعادة تعيين المؤشر حتى لو حدث خطأ
-            is_processing_queue = False
-@app.post("/embed/text", status_code=status.HTTP_202_ACCEPTED) # تغيير حالة الاستجابة إلى 202 Accepted
-async def embed_text(request: TextRequest, background_tasks: BackgroundTasks):
-    global is_processing_queue
-    request_data = {"text": request.text}
-    if request.request_id:
-        request_data["request_id"] = request.request_id
-    async with queue_lock:
-        request_queue.append(request_data) # إضافة الطلب إلى قائمة الانتظار
-        logger.info(f"Request (ID: {request.request_id or 'N/A'}) added to queue. Queue size: {len(request_queue)}")
-        # إذا لم يكن هناك معالج يعمل حاليًا، ابدأ واحدًا في الخلفية
-        if not is_processing_queue:
-            is_processing_queue = True # تعيين المؤشر لمنع بدء معالجات متعددة
-            background_tasks.add_task(process_queue)
-            logger.info("Started background queue processor.")
-    # إرجاع استجابة سريعة للعميل لإعلامه بأن الطلب تم استلامه ومعالجته لاحقًا
-    return {
-        "success": True,
-        "message": "Request received and added to queue for processing.",
-        "request_id": request.request_id # إرجاع الـ ID للعميل
-    }
-def run_uvicorn():
-    nest_asyncio.apply()
-    uvicorn.run(app, host="0.0.0.0", port=7860)
-# Start Uvicorn in a new thread
-uvicorn_thread = threading.Thread(target=run_uvicorn)
-uvicorn_thread.start()
-# Optional: Add a small delay to allow Uvicorn to start up
-time.sleep(1)

+from fastapi import FastAPI, HTTPException, BackgroundTasks
 from pydantic import BaseModel, Field
 from sentence_transformers import SentenceTransformer
 import uvicorn
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
+from typing import List
 import numpy as np
 from contextlib import asynccontextmanager
 import httpx
 import os
 # Globals
+model = None
+tokenizer = None
 model_id = 'Qwen/Qwen3-Embedding-0.6B'
+executor = ThreadPoolExecutor(max_workers=4)
 MAX_TOKENS = 32000
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Load the model and tokenizer at startup
     global model, tokenizer
+    print(f"Loading model: {model_id}...")
+    model = SentenceTransformer(model_id)
+    tokenizer = model.tokenizer
+    print("Model loaded successfully")
     yield
     # (Optional) Clean up resources at shutdown
+    print("Cleaning up resources...")
     model = None
     tokenizer = None
 app = FastAPI(
     title="Text Embedding API (Qwen/Qwen3-Embedding-0.6B)",
 class TextRequest(BaseModel):
     text: str = Field(..., min_length=1, description="Text to embed")
     request_id: str | None = Field(None, description="Optional unique identifier for the request")
+async def send_to_webhook(url: str, data: dict):
     """Sends data to a webhook URL asynchronously."""
     try:
         async with httpx.AsyncClient() as client:
             response = await client.post(url, json=data)
             response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+            print(f"Successfully sent data to webhook: {url}")
     except httpx.RequestError as e:
+        print(f"Error sending data to webhook {url}: {e}")
 @app.get("/")
 def home():
 def chunk_and_embed(text: str) -> List[float]:
     """Split text into chunks if too long, then pool embeddings"""
     tokens = tokenizer.encode(text, add_special_tokens=False)
     # If text is short, embed directly
     if len(tokens) <= MAX_TOKENS:
         return model.encode(text, normalize_embeddings=True).tolist()
     # Split into chunks
     chunks = []
+    overlap = 50
     start = 0
     while start < len(tokens):
         end = start + MAX_TOKENS
         chunk_tokens = tokens[start:end]
         chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
         chunks.append(chunk_text)
+        if end >= len(tokens):
             break
+        start = end - overlap
     # Embed all chunks
     chunk_embeddings = [model.encode(chunk, normalize_embeddings=True) for chunk in chunks]
     # Pool embeddings (mean)
     final_embedding = np.mean(chunk_embeddings, axis=0).tolist()
     return final_embedding
+@app.post("/embed/text")
+async def embed_text(request: TextRequest, background_tasks: BackgroundTasks):
     try:
+        loop = asyncio.get_event_loop()
+        embedding = await loop.run_in_executor(
+            executor,
+            lambda: chunk_and_embed(request.text)
+        )
+        # Check for webhook URL and add the background task
+        webhook_url = os.environ.get("WEBHOOK_URL")
+        if webhook_url:
+            payload = {
+                "text": request.text,
+                "embedding": embedding,
+                "request_id": request.request_id
+            }
+            background_tasks.add_task(send_to_webhook, webhook_url, payload)
+        return {
+            "success": True,
+            "model": model_id,
+            "dimension": len(embedding),
+            "embedding": embedding
+        }
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)