Spaces:

Ojochegbeng
/

Pansgpt

Running

App Files Files Community

Ojochegbeng commited on Sep 15

Commit

14cf01c

verified ·

1 Parent(s): 8ede5e9

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -138

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
-import gradio as gr
 import torch
 import numpy as np
 from transformers import AutoTokenizer, AutoModel
@@ -7,6 +8,7 @@ import json
 import logging
 import os
 import time
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -193,160 +195,111 @@ def health_check():
     """Health check endpoint"""
     return {"status": "healthy", "model_loaded": model is not None}
-# Create Gradio interface
-def create_interface():
-    """Create the Gradio interface"""
-    with gr.Blocks(
-        title="Qwen Embedding Model",
-        theme=gr.themes.Soft(),
-        css="""
-        .gradio-container {
-            max-width: 1200px !important;
-            margin: auto !important;
         }
-        """
-    ) as interface:
-        gr.Markdown("""
-        # Qwen Embedding Model API
-        This space provides a stable API for generating text embeddings using the Qwen model.
-        The API supports both single text and batch processing.
-        """)
-        with gr.Tab("Single Text Embedding"):
-            gr.Markdown("Generate embedding for a single text input.")
-            with gr.Row():
-                with gr.Column():
-                    single_text_input = gr.Textbox(
-                        label="Input Text",
-                        placeholder="Enter text to generate embedding...",
-                        lines=3
-                    )
-                    single_btn = gr.Button("Generate Embedding", variant="primary")
-                with gr.Column():
-                    single_output = gr.Textbox(
-                        label="Embedding (JSON)",
-                        lines=10,
-                        interactive=False
-                    )
-            single_btn.click(
-                single_embedding_interface,
-                inputs=[single_text_input],
-                outputs=[single_output]
-            )
-        with gr.Tab("Batch Text Embedding"):
-            gr.Markdown("Generate embeddings for multiple texts (one per line).")
-            with gr.Row():
-                with gr.Column():
-                    batch_text_input = gr.Textbox(
-                        label="Input Texts (one per line)",
-                        placeholder="Enter multiple texts, one per line...",
-                        lines=5
-                    )
-                    batch_btn = gr.Button("Generate Embeddings", variant="primary")
-                with gr.Column():
-                    batch_output = gr.Textbox(
-                        label="Embeddings (JSON)",
-                        lines=10,
-                        interactive=False
-                    )
-            batch_btn.click(
-                batch_embedding_interface,
-                inputs=[batch_text_input],
-                outputs=[batch_output]
-            )
-        with gr.Tab("Similarity Calculator"):
-            gr.Markdown("Compute cosine similarity between two embeddings.")
-            with gr.Row():
-                with gr.Column():
-                    emb1_input = gr.Textbox(
-                        label="Embedding 1 (JSON)",
-                        placeholder='["0.1", "0.2", ...]',
-                        lines=3
-                    )
-                    emb2_input = gr.Textbox(
-                        label="Embedding 2 (JSON)",
-                        placeholder='["0.1", "0.2", ...]',
-                        lines=3
-                    )
-                    sim_btn = gr.Button("Compute Similarity", variant="primary")
-                with gr.Column():
-                    similarity_output = gr.Number(
-                        label="Cosine Similarity",
-                        precision=4
-                    )
-            sim_btn.click(
-                similarity_interface,
-                inputs=[emb1_input, emb2_input],
-                outputs=[similarity_output]
-            )
-        with gr.Tab("API Documentation"):
-            gr.Markdown("""
-            ## API Endpoints
-            ### 1. Single Text Embedding
-            **POST** `/api/predict`
-            ```json
-            {
-                "data": ["Your text here"]
-            }
-            ```
-            ### 2. Batch Text Embedding
-            **POST** `/api/predict`
-            ```json
-            {
-                "data": [["Text 1", "Text 2", "Text 3"]]
-            }
-            ```
-            ### 3. Health Check
-            **GET** `/health`
-            Returns: `{"status": "healthy", "model_loaded": true}`
-            ## Response Format
-            All endpoints return embeddings as JSON arrays of floating-point numbers.
-            """)
-    return interface
 def main():
     """Main function to run the application"""
-    logger.info("Starting Qwen Embedding Model API...")
     # Load model
     if not load_model():
         logger.error("Failed to load model. Exiting...")
         return
-    # Create and launch interface
-    interface = create_interface()
-    # Launch with public access
-    interface.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True,
-        quiet=False
     )
 if __name__ == "__main__":

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
 import torch
 import numpy as np
 from transformers import AutoTokenizer, AutoModel
 import logging
 import os
 import time
+import uvicorn
 # Configure logging
 logging.basicConfig(level=logging.INFO)
     """Health check endpoint"""
     return {"status": "healthy", "model_loaded": model is not None}
+# Create FastAPI application
+app = FastAPI(
+    title="Qwen3 Embedding API",
+    description="A stable API for generating text embeddings using the Qwen3-Embedding-0.6B model",
+    version="1.0.0"
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# FastAPI endpoints
+@app.get("/")
+async def root():
+    """Root endpoint with API information"""
+    return {
+        "message": "Qwen3 Embedding API",
+        "version": "1.0.0",
+        "model": "Qwen3-Embedding-0.6B",
+        "endpoints": {
+            "health": "/health",
+            "predict": "/api/predict",
+            "docs": "/docs"
         }
+    }
+@app.get("/health")
+async def health():
+    """Health check endpoint"""
+    return health_check()
+@app.post("/api/predict")
+async def predict(data: dict):
+    """Main prediction endpoint for embeddings"""
+    try:
+        if "data" not in data:
+            raise HTTPException(status_code=400, detail="Missing 'data' field in request")
+        input_data = data["data"]
+        # Handle single text or batch texts
+        if isinstance(input_data, str):
+            # Single text
+            embeddings = generate_embeddings(input_data)
+            return {"data": [embeddings]}
+        elif isinstance(input_data, list):
+            if len(input_data) > 0 and isinstance(input_data[0], str):
+                # Single text in list
+                embeddings = generate_embeddings(input_data[0])
+                return {"data": [embeddings]}
+            elif len(input_data) > 0 and isinstance(input_data[0], list):
+                # Batch texts
+                embeddings = generate_embeddings(input_data[0])
+                return {"data": [embeddings]}
+            else:
+                raise HTTPException(status_code=400, detail="Invalid data format")
+        else:
+            raise HTTPException(status_code=400, detail="Invalid data type")
+    except Exception as e:
+        logger.error(f"Error in predict endpoint: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+@app.post("/api/similarity")
+async def similarity(data: dict):
+    """Compute similarity between two embeddings"""
+    try:
+        if "embedding1" not in data or "embedding2" not in data:
+            raise HTTPException(status_code=400, detail="Missing embedding1 or embedding2 field")
+        emb1 = data["embedding1"]
+        emb2 = data["embedding2"]
+        if not isinstance(emb1, list) or not isinstance(emb2, list):
+            raise HTTPException(status_code=400, detail="Embeddings must be lists")
+        sim = compute_similarity(emb1, emb2)
+        return {"similarity": sim}
+    except Exception as e:
+        logger.error(f"Error in similarity endpoint: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 def main():
     """Main function to run the application"""
+    logger.info("Starting Qwen3 Embedding Model API...")
     # Load model
     if not load_model():
         logger.error("Failed to load model. Exiting...")
         return
+    logger.info("Model loaded successfully. Starting FastAPI server...")
+    # Run with uvicorn
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=7860,
+        log_level="info"
     )
 if __name__ == "__main__":