embedingHF commited on
Commit
4225683
·
verified ·
1 Parent(s): 07d8884

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. CMD.bash +14 -0
  2. New Text Document.txt +0 -0
  3. app.py +69 -0
  4. model.py +18 -0
  5. requirements.txt +6 -0
  6. test_request.py +21 -0
CMD.bash ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # 2. Virtual environment (recommended)
3
+ python -m venv venv
4
+ source venv/bin/activate # Linux/Mac
5
+ # venv\Scripts\activate # Windows
6
+
7
+ # 3. Install dependencies
8
+ pip install -r requirements.txt
9
+
10
+ # 4. Model load karo (pehli baar thoda time lagega)
11
+ python -c "from model import model_instance; print('Model ready')"
12
+
13
+ # 5. Server start karo
14
+ uvicorn app:app --reload --host 0.0.0.0 --port 8000
New Text Document.txt ADDED
File without changes
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel, Field
3
+ from typing import List
4
+ from model import model_instance
5
+ import time
6
+ import logging
7
+
8
+ # Logging setup
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ app = FastAPI(
13
+ title="Sentence Embedding API",
14
+ description="Aapke trained model se text embedding nikaalne ka API",
15
+ version="1.0.0"
16
+ )
17
+
18
+ # Request body ka structure
19
+ class TextInput(BaseModel):
20
+ text: str = Field(..., min_length=1, max_length=512, example="Mera naam Bahadur hai")
21
+
22
+ class EmbeddingResponse(BaseModel):
23
+ embedding: List[float]
24
+ input_text: str
25
+ inference_time_ms: float
26
+
27
+ # Health check endpoint
28
+ @app.get("/")
29
+ def root():
30
+ return {"message": "API is running! Go to /docs for Swagger UI"}
31
+
32
+ @app.get("/health")
33
+ def health_check():
34
+ return {"status": "healthy", "model_loaded": True}
35
+
36
+ # Main prediction endpoint
37
+ @app.post("/embed", response_model=EmbeddingResponse)
38
+ async def get_embedding(input_data: TextInput):
39
+ try:
40
+ logger.info(f"Processing text: {input_data.text[:50]}...")
41
+
42
+ start_time = time.time()
43
+ embedding = model_instance.get_embedding(input_data.text)
44
+ inference_time = (time.time() - start_time) * 1000 # milliseconds
45
+
46
+ return EmbeddingResponse(
47
+ embedding=embedding,
48
+ input_text=input_data.text,
49
+ inference_time_ms=round(inference_time, 2)
50
+ )
51
+
52
+ except Exception as e:
53
+ logger.error(f"Error: {str(e)}")
54
+ raise HTTPException(status_code=500, detail=str(e))
55
+
56
+ # Batch processing (optional)
57
+ class BatchTextInput(BaseModel):
58
+ texts: List[str]
59
+
60
+ @app.post("/embed/batch")
61
+ async def get_batch_embeddings(input_data: BatchTextInput):
62
+ results = []
63
+ for text in input_data.texts:
64
+ embedding = model_instance.get_embedding(text)
65
+ results.append({
66
+ "text": text,
67
+ "embedding": embedding
68
+ })
69
+ return {"results": results, "count": len(results)}
model.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import torch
3
+
4
+ class EmbeddingModel:
5
+ def __init__(self, model_name="embedingHF/Sentence_Transformer"):
6
+ # Aapka apna HF model ya koi bhi pre-trained
7
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
8
+ print(f"Loading model on {self.device}...")
9
+ self.model = SentenceTransformer(model_name, device=self.device)
10
+ print("Model loaded successfully!")
11
+
12
+ def get_embedding(self, text: str):
13
+ """Convert text to vector embedding"""
14
+ embedding = self.model.encode(text, convert_to_tensor=True)
15
+ return embedding.cpu().numpy().tolist()
16
+
17
+ # Global instance (ek baar load hoga, baar baar nahi)
18
+ model_instance = EmbeddingModel()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi>=0.104.1
2
+ uvicorn>=0.24.0
3
+ torch>=2.1.0
4
+ transformers>=4.35.0
5
+ sentence-transformers>=2.2.2
6
+ pydantic>=2.4.2
test_request.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ # API ko call karo
5
+ url = "http://localhost:8000/embed"
6
+
7
+ payload = {
8
+ "text": "Mujhe professional AI developer banna hai!"
9
+ }
10
+
11
+ response = requests.post(url, json=payload)
12
+
13
+ if response.status_code == 200:
14
+ result = response.json()
15
+ print(f"✅ Input: {result['input_text']}")
16
+ print(f"📊 Embedding dimension: {len(result['embedding'])}")
17
+ print(f"⚡ Time taken: {result['inference_time_ms']} ms")
18
+ print(f"🔢 First 5 values: {result['embedding'][:5]}")
19
+ else:
20
+ print(f"❌ Error: {response.status_code}")
21
+ print(response.text)