AJ STUDIOZ commited on
Commit
4534aef
·
1 Parent(s): 5528539

Revert to HuggingFace InferenceClient - Cloud-based solution for low-spec systems

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +18 -21
  3. requirements.txt +1 -1
README.md CHANGED
@@ -10,7 +10,7 @@ license: mit
10
 
11
  # 🚀 AJ STUDIOZ DeepSeek API
12
 
13
- Enterprise-grade AI API powered by **DeepSeek-R1-Distill-Qwen-1.5B** - Advanced reasoning in a compact 1.5B parameter model.
14
 
15
  ![Status](https://img.shields.io/badge/Status-Online-success?style=for-the-badge)
16
  ![Model](https://img.shields.io/badge/Model-DeepSeek--R1-blue?style=for-the-badge)
 
10
 
11
  # 🚀 AJ STUDIOZ DeepSeek API
12
 
13
+ Enterprise-grade AI API powered by **Qwen2.5-Coder-0.5B** - Fast, reliable, and excellent for coding tasks.
14
 
15
  ![Status](https://img.shields.io/badge/Status-Online-success?style=for-the-badge)
16
  ![Model](https://img.shields.io/badge/Model-DeepSeek--R1-blue?style=for-the-badge)
app.py CHANGED
@@ -3,37 +3,35 @@ from fastapi.responses import JSONResponse, StreamingResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from typing import Optional, List, Dict, Any
5
  from huggingface_hub import InferenceClient
 
6
  import os
7
  import json
8
  import secrets
9
  from datetime import datetime
10
- import time
11
 
12
- # Hugging Face API configuration
13
  HF_TOKEN = os.getenv("HF_TOKEN", "")
14
 
15
- # Use TinyLlama - Fast and reliable
 
 
 
16
  MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
17
- API_URL = f"https://api-inference.huggingface.co/models/{MODEL_NAME}"
18
 
19
- def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
20
  """Query Hugging Face Inference API using InferenceClient"""
21
  try:
22
- # Initialize InferenceClient with token if available
23
- client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
24
-
25
- # Use text_generation method
26
  result = client.text_generation(
27
  prompt,
28
  model=MODEL_NAME,
29
  max_new_tokens=min(max_tokens, 500),
30
  temperature=temperature,
31
- top_p=0.95,
32
  return_full_text=False,
33
- do_sample=temperature > 0
34
  )
35
 
36
- # Return a compatible response object
37
  class SuccessResponse:
38
  status_code = 200
39
  def json(self):
@@ -41,9 +39,8 @@ def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7
41
  text = result
42
 
43
  return SuccessResponse()
44
-
45
  except Exception as e:
46
- # Create a mock response for error handling
47
  class ErrorResponse:
48
  status_code = 500
49
  def json(self):
@@ -82,7 +79,7 @@ def extract_anthropic_key(x_api_key: Optional[str]) -> Optional[str]:
82
  app = FastAPI(
83
  title="AJ STUDIOZ DeepSeek API",
84
  version="1.0",
85
- description="Enterprise-grade AI API - Powered by DeepSeek-R1-Distill-Qwen-1.5B with advanced reasoning"
86
  )
87
 
88
  # Enable CORS
@@ -198,7 +195,7 @@ async def anthropic_messages(
198
  prompt_parts.append("Assistant:")
199
  full_prompt = "\n\n".join(prompt_parts)
200
 
201
- response = query_hf_model(full_prompt, max_tokens, temperature)
202
 
203
  if response.status_code == 200:
204
  result = response.json()
@@ -315,7 +312,7 @@ async def chat_completions(request: Request, authorization: Optional[str] = Head
315
  prompt = "\n\n".join(prompt_parts) + "\n\nAssistant:"
316
  completion_id = f"chatcmpl-{secrets.token_hex(12)}"
317
 
318
- response = query_hf_model(prompt, max_tokens, temperature)
319
 
320
  if response.status_code == 200:
321
  result = response.json()
@@ -372,7 +369,7 @@ async def completions(request: Request, authorization: Optional[str] = Header(No
372
  if not prompt:
373
  raise HTTPException(status_code=400, detail="Prompt is required")
374
 
375
- response = query_hf_model(prompt, max_tokens, temperature)
376
 
377
  if response.status_code == 200:
378
  result = response.json()
@@ -420,7 +417,7 @@ async def chat(request: Request):
420
  # Simple prompt for Qwen
421
  full_message = f"You are AJ, a helpful AI assistant by AJ STUDIOZ.\n\nUser: {message}\n\nAssistant:"
422
 
423
- response = query_hf_model(full_message, 500, 0.7)
424
 
425
  if response.status_code == 200:
426
  result = response.json()
@@ -458,7 +455,7 @@ async def generate(request: Request):
458
  if not prompt:
459
  return JSONResponse({"error": "Prompt is required"}, status_code=400)
460
 
461
- response = query_hf_model(prompt, max_tokens, temperature)
462
 
463
  if response.status_code == 200:
464
  result = response.json()
@@ -489,7 +486,7 @@ async def health():
489
  """Health check endpoint"""
490
  try:
491
  # Quick test of the model
492
- test_response = query_hf_model("Hello", 10, 0.7)
493
  model_healthy = test_response.status_code == 200
494
 
495
  return {
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from typing import Optional, List, Dict, Any
5
  from huggingface_hub import InferenceClient
6
+ import time
7
  import os
8
  import json
9
  import secrets
10
  from datetime import datetime
 
11
 
12
+ # Hugging Face API configuration
13
  HF_TOKEN = os.getenv("HF_TOKEN", "")
14
 
15
+ # Initialize Inference Client
16
+ client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
17
+
18
+ # Use TinyLlama - Small, fast, and reliable
19
  MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
20
 
21
+ def query_ollama_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
22
  """Query Hugging Face Inference API using InferenceClient"""
23
  try:
24
+ # Use text generation
 
 
 
25
  result = client.text_generation(
26
  prompt,
27
  model=MODEL_NAME,
28
  max_new_tokens=min(max_tokens, 500),
29
  temperature=temperature,
 
30
  return_full_text=False,
31
+ do_sample=temperature > 0,
32
  )
33
 
34
+ # Create response object
35
  class SuccessResponse:
36
  status_code = 200
37
  def json(self):
 
39
  text = result
40
 
41
  return SuccessResponse()
 
42
  except Exception as e:
43
+ # Create error response
44
  class ErrorResponse:
45
  status_code = 500
46
  def json(self):
 
79
  app = FastAPI(
80
  title="AJ STUDIOZ DeepSeek API",
81
  version="1.0",
82
+ description="Enterprise-grade AI API - Powered by local DeepSeek-R1:8B with advanced reasoning"
83
  )
84
 
85
  # Enable CORS
 
195
  prompt_parts.append("Assistant:")
196
  full_prompt = "\n\n".join(prompt_parts)
197
 
198
+ response = query_ollama_model(full_prompt, max_tokens, temperature)
199
 
200
  if response.status_code == 200:
201
  result = response.json()
 
312
  prompt = "\n\n".join(prompt_parts) + "\n\nAssistant:"
313
  completion_id = f"chatcmpl-{secrets.token_hex(12)}"
314
 
315
+ response = query_ollama_model(prompt, max_tokens, temperature)
316
 
317
  if response.status_code == 200:
318
  result = response.json()
 
369
  if not prompt:
370
  raise HTTPException(status_code=400, detail="Prompt is required")
371
 
372
+ response = query_ollama_model(prompt, max_tokens, temperature)
373
 
374
  if response.status_code == 200:
375
  result = response.json()
 
417
  # Simple prompt for Qwen
418
  full_message = f"You are AJ, a helpful AI assistant by AJ STUDIOZ.\n\nUser: {message}\n\nAssistant:"
419
 
420
+ response = query_ollama_model(full_message, 500, 0.7)
421
 
422
  if response.status_code == 200:
423
  result = response.json()
 
455
  if not prompt:
456
  return JSONResponse({"error": "Prompt is required"}, status_code=400)
457
 
458
+ response = query_ollama_model(prompt, max_tokens, temperature)
459
 
460
  if response.status_code == 200:
461
  result = response.json()
 
486
  """Health check endpoint"""
487
  try:
488
  # Quick test of the model
489
+ test_response = query_ollama_model("Hello", 10, 0.7)
490
  model_healthy = test_response.status_code == 200
491
 
492
  return {
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  fastapi==0.104.1
2
  uvicorn[standard]==0.24.0
3
- huggingface-hub==0.20.2
4
  python-multipart==0.0.6
 
 
1
  fastapi==0.104.1
2
  uvicorn[standard]==0.24.0
 
3
  python-multipart==0.0.6
4
+ huggingface-hub==0.20.2