Adi362 commited on
Commit
99e15e7
·
verified ·
1 Parent(s): 5fba432

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from typing import List, Optional
@@ -7,16 +7,15 @@ import os
7
 
8
  app = FastAPI()
9
 
 
 
 
10
 
11
- GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
- GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
13
- GROQ_MODEL = "llama-3.3-70b-versatile"
14
-
15
 
16
  SYSTEM_PROMPT = """You are a helpful, harmless, and honest AI assistant.
17
  Provide clear and conversational responses."""
18
 
19
-
20
  local_llm = None
21
 
22
  def get_local_llm():
@@ -42,42 +41,45 @@ class ChatRequest(BaseModel):
42
  temperature: Optional[float] = 0.7
43
  repetition_penalty: Optional[float] = 1.1
44
 
 
 
 
 
 
45
  @app.get("/")
46
  def root():
47
- return {"status": "edyx convo model running", "mode": "groq-primary"}
48
 
49
- async def call_groq_api(messages: List[Message], max_tokens: int, temperature: float):
50
- """Try to get response from Groq API"""
51
- if not GROQ_API_KEY:
52
- raise Exception("GROQ_API_KEY not configured")
53
 
54
- groq_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
55
  for m in messages:
56
- groq_messages.append({"role": m.role, "content": m.content})
57
 
58
  async with httpx.AsyncClient(timeout=45.0) as client:
59
  response = await client.post(
60
- GROQ_API_URL,
61
  headers={
62
  "Content-Type": "application/json",
63
- "Authorization": f"Bearer {GROQ_API_KEY}"
64
  },
65
  json={
66
- "model": GROQ_MODEL,
67
- "messages": groq_messages,
68
  "max_tokens": max_tokens,
69
  "temperature": temperature
70
  }
71
  )
72
 
73
  if response.status_code != 200:
74
- raise Exception(f"Groq API error: {response.status_code} - {response.text}")
75
 
76
  data = response.json()
77
  return data["choices"][0]["message"]["content"], data["usage"]["total_tokens"]
78
 
79
  def call_local_model(messages: List[Message], max_tokens: int, temperature: float, repetition_penalty: float):
80
- """Fallback to local llama model - YOUR ORIGINAL LOGIC"""
81
  llm = get_local_llm()
82
 
83
  prompt = SYSTEM_PROMPT + "\n\n"
@@ -100,11 +102,10 @@ def call_local_model(messages: List[Message], max_tokens: int, temperature: floa
100
 
101
  return output["choices"][0]["text"].strip(), output["usage"]["total_tokens"]
102
 
103
- @app.post("/v1/chat")
104
  async def chat(req: ChatRequest):
105
- # Try Groq API first (fast path)
106
  try:
107
- text, tokens = await call_groq_api(req.messages, req.max_tokens, req.temperature)
108
  return {
109
  "model": "edyx-convo",
110
  "text": text,
@@ -112,9 +113,8 @@ async def chat(req: ChatRequest):
112
  "source": "primary"
113
  }
114
  except Exception as e:
115
- print(f"Groq API failed: {e}, falling back to local model...")
116
 
117
- # Fallback to local model - YOUR ORIGINAL CODE
118
  try:
119
  text, tokens = call_local_model(
120
  req.messages,
 
1
+ from fastapi import FastAPI, HTTPException, Security, Header
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from typing import List, Optional
 
7
 
8
  app = FastAPI()
9
 
10
+ SERVICE_API_KEY = os.environ.get("SERVICE_API_KEY")
11
+ SERVICE_API_URL = "https://api.groq.com/openai/v1/chat/completions"
12
+ SERVICE_MODEL = "llama-3.3-70b-versatile"
13
 
14
+ EDYX_ACCESS_TOKEN = os.environ.get("EDYX_ACCESS_TOKEN")
 
 
 
15
 
16
  SYSTEM_PROMPT = """You are a helpful, harmless, and honest AI assistant.
17
  Provide clear and conversational responses."""
18
 
 
19
  local_llm = None
20
 
21
  def get_local_llm():
 
41
  temperature: Optional[float] = 0.7
42
  repetition_penalty: Optional[float] = 1.1
43
 
44
+ async def verify_token(x_edyx_token: str = Header(None)):
45
+ if EDYX_ACCESS_TOKEN and x_edyx_token != EDYX_ACCESS_TOKEN:
46
+ raise HTTPException(status_code=403, detail="Unauthorized: Invalid Access Token")
47
+ return x_edyx_token
48
+
49
  @app.get("/")
50
  def root():
51
+ return {"status": "edyx convo model running", "mode": "accelerated-primary"}
52
 
53
+ async def call_service_api(messages: List[Message], max_tokens: int, temperature: float):
54
+ if not SERVICE_API_KEY:
55
+ raise Exception("Service API key not configured")
 
56
 
57
+ service_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
58
  for m in messages:
59
+ service_messages.append({"role": m.role, "content": m.content})
60
 
61
  async with httpx.AsyncClient(timeout=45.0) as client:
62
  response = await client.post(
63
+ SERVICE_API_URL,
64
  headers={
65
  "Content-Type": "application/json",
66
+ "Authorization": f"Bearer {SERVICE_API_KEY}"
67
  },
68
  json={
69
+ "model": SERVICE_MODEL,
70
+ "messages": service_messages,
71
  "max_tokens": max_tokens,
72
  "temperature": temperature
73
  }
74
  )
75
 
76
  if response.status_code != 200:
77
+ raise Exception(f"Service API error: {response.status_code} - {response.text}")
78
 
79
  data = response.json()
80
  return data["choices"][0]["message"]["content"], data["usage"]["total_tokens"]
81
 
82
  def call_local_model(messages: List[Message], max_tokens: int, temperature: float, repetition_penalty: float):
 
83
  llm = get_local_llm()
84
 
85
  prompt = SYSTEM_PROMPT + "\n\n"
 
102
 
103
  return output["choices"][0]["text"].strip(), output["usage"]["total_tokens"]
104
 
105
+ @app.post("/v1/chat", dependencies=[Security(verify_token)])
106
  async def chat(req: ChatRequest):
 
107
  try:
108
+ text, tokens = await call_service_api(req.messages, req.max_tokens, req.temperature)
109
  return {
110
  "model": "edyx-convo",
111
  "text": text,
 
113
  "source": "primary"
114
  }
115
  except Exception as e:
116
+ print(f"Service API failed: {e}, falling back to local model...")
117
 
 
118
  try:
119
  text, tokens = call_local_model(
120
  req.messages,