Sam3838 commited on
Commit
6955f97
·
verified ·
1 Parent(s): 4bdc240

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +18 -0
  2. main.py +86 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python image as base
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy requirements and install dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Copy application code
12
+ COPY . .
13
+
14
+ # Expose port 7860 for Hugging Face Spaces
15
+ EXPOSE 7860
16
+
17
+ # Command to run the FastAPI app with uvicorn
18
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI, HTTPException, Request
3
+ from pydantic import BaseModel
4
+ import httpx
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ app = FastAPI()
10
+
11
+ HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://yv6q340a33xhe4bs.us-east-1.aws.endpoints.huggingface.cloud")
12
+ HF_API_KEY = os.getenv("HF_API_KEY", None)
13
+
14
+ headers = {
15
+ "Accept": "application/json",
16
+ "Content-Type": "application/json",
17
+ }
18
+
19
+ if HF_API_KEY:
20
+ headers["Authorization"] = f"Bearer {HF_API_KEY}"
21
+
22
+ class EmbeddingRequest(BaseModel):
23
+ inputs: str
24
+ parameters: dict = {}
25
+
26
+ class OpenAIEmbeddingData(BaseModel):
27
+ object: str
28
+ embedding: list
29
+ index: int
30
+
31
+ class OpenAIEmbeddingResponse(BaseModel):
32
+ object: str
33
+ data: list
34
+ model: str
35
+ usage: dict
36
+
37
+ @app.post("/v1/embeddings", response_model=OpenAIEmbeddingResponse)
38
+ async def embeddings(request: EmbeddingRequest):
39
+ payload = {
40
+ "inputs": request.inputs,
41
+ "parameters": request.parameters
42
+ }
43
+ async with httpx.AsyncClient() as client:
44
+ try:
45
+ response = await client.post(HF_ENDPOINT, json=payload, headers=headers)
46
+ response.raise_for_status()
47
+ except httpx.HTTPError as e:
48
+ raise HTTPException(status_code=500, detail=f"Failed to fetch embeddings: {str(e)}")
49
+
50
+ hf_data = response.json()
51
+ # Huggingface response is a list of floats (embedding vector), so hf_data is a list
52
+ if isinstance(hf_data, list):
53
+ embeddings = hf_data
54
+ elif isinstance(hf_data, dict) and "embeddings" in hf_data:
55
+ embeddings = hf_data["embeddings"]
56
+ else:
57
+ embeddings = hf_data # fallback
58
+
59
+ openai_response = OpenAIEmbeddingResponse(
60
+ object="list",
61
+ data=[OpenAIEmbeddingData(object="embedding", embedding=embeddings, index=0)],
62
+ model="Qwen3-Embedding-8B",
63
+ usage={"prompt_tokens": 0, "total_tokens": 0}
64
+ )
65
+
66
+ return openai_response
67
+
68
+ @app.get("/health")
69
+ async def health():
70
+ return {"status": "ok"}
71
+
72
+ @app.get("/v1/modals")
73
+ async def list_modals():
74
+ return {
75
+ "object": "list",
76
+ "data": [
77
+ {
78
+ "id": "Qwen3-Embedding-8B",
79
+ "object": "model",
80
+ "owned_by": "qwen",
81
+ "permission": [],
82
+ "root": "Qwen3-Embedding-8B",
83
+ "parent": None
84
+ }
85
+ ]
86
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ httpx
4
+ python-dotenv