Spaces:

edyxapi
/

convo-model

Sleeping

App Files Files Community

Adi362 commited on Feb 6

Commit

99e15e7

verified ·

1 Parent(s): 5fba432

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -24

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
 from typing import List, Optional
@@ -7,16 +7,15 @@ import os
 app = FastAPI()
-GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
-GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
-GROQ_MODEL = "llama-3.3-70b-versatile"
 SYSTEM_PROMPT = """You are a helpful, harmless, and honest AI assistant.
 Provide clear and conversational responses."""
 local_llm = None
 def get_local_llm():
@@ -42,42 +41,45 @@ class ChatRequest(BaseModel):
     temperature: Optional[float] = 0.7
     repetition_penalty: Optional[float] = 1.1
 @app.get("/")
 def root():
-    return {"status": "edyx convo model running", "mode": "groq-primary"}
-async def call_groq_api(messages: List[Message], max_tokens: int, temperature: float):
-    """Try to get response from Groq API"""
-    if not GROQ_API_KEY:
-        raise Exception("GROQ_API_KEY not configured")
-    groq_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
     for m in messages:
-        groq_messages.append({"role": m.role, "content": m.content})
     async with httpx.AsyncClient(timeout=45.0) as client:
         response = await client.post(
-            GROQ_API_URL,
             headers={
                 "Content-Type": "application/json",
-                "Authorization": f"Bearer {GROQ_API_KEY}"
             },
             json={
-                "model": GROQ_MODEL,
-                "messages": groq_messages,
                 "max_tokens": max_tokens,
                 "temperature": temperature
             }
         )
         if response.status_code != 200:
-            raise Exception(f"Groq API error: {response.status_code} - {response.text}")
         data = response.json()
         return data["choices"][0]["message"]["content"], data["usage"]["total_tokens"]
 def call_local_model(messages: List[Message], max_tokens: int, temperature: float, repetition_penalty: float):
-    """Fallback to local llama model - YOUR ORIGINAL LOGIC"""
     llm = get_local_llm()
     prompt = SYSTEM_PROMPT + "\n\n"
@@ -100,11 +102,10 @@ def call_local_model(messages: List[Message], max_tokens: int, temperature: floa
     return output["choices"][0]["text"].strip(), output["usage"]["total_tokens"]
-@app.post("/v1/chat")
 async def chat(req: ChatRequest):
-    # Try Groq API first (fast path)
     try:
-        text, tokens = await call_groq_api(req.messages, req.max_tokens, req.temperature)
         return {
             "model": "edyx-convo",
             "text": text,
@@ -112,9 +113,8 @@ async def chat(req: ChatRequest):
             "source": "primary"
         }
     except Exception as e:
-        print(f"Groq API failed: {e}, falling back to local model...")
-    # Fallback to local model - YOUR ORIGINAL CODE
     try:
         text, tokens = call_local_model(
             req.messages,

+from fastapi import FastAPI, HTTPException, Security, Header
 from pydantic import BaseModel
 from llama_cpp import Llama
 from typing import List, Optional
 app = FastAPI()
+SERVICE_API_KEY = os.environ.get("SERVICE_API_KEY")
+SERVICE_API_URL = "https://api.groq.com/openai/v1/chat/completions"
+SERVICE_MODEL = "llama-3.3-70b-versatile"
+EDYX_ACCESS_TOKEN = os.environ.get("EDYX_ACCESS_TOKEN")
 SYSTEM_PROMPT = """You are a helpful, harmless, and honest AI assistant.
 Provide clear and conversational responses."""
 local_llm = None
 def get_local_llm():
     temperature: Optional[float] = 0.7
     repetition_penalty: Optional[float] = 1.1
+async def verify_token(x_edyx_token: str = Header(None)):
+    if EDYX_ACCESS_TOKEN and x_edyx_token != EDYX_ACCESS_TOKEN:
+        raise HTTPException(status_code=403, detail="Unauthorized: Invalid Access Token")
+    return x_edyx_token
 @app.get("/")
 def root():
+    return {"status": "edyx convo model running", "mode": "accelerated-primary"}
+async def call_service_api(messages: List[Message], max_tokens: int, temperature: float):
+    if not SERVICE_API_KEY:
+        raise Exception("Service API key not configured")
+    service_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
     for m in messages:
+        service_messages.append({"role": m.role, "content": m.content})
     async with httpx.AsyncClient(timeout=45.0) as client:
         response = await client.post(
+            SERVICE_API_URL,
             headers={
                 "Content-Type": "application/json",
+                "Authorization": f"Bearer {SERVICE_API_KEY}"
             },
             json={
+                "model": SERVICE_MODEL,
+                "messages": service_messages,
                 "max_tokens": max_tokens,
                 "temperature": temperature
             }
         )
         if response.status_code != 200:
+            raise Exception(f"Service API error: {response.status_code} - {response.text}")
         data = response.json()
         return data["choices"][0]["message"]["content"], data["usage"]["total_tokens"]
 def call_local_model(messages: List[Message], max_tokens: int, temperature: float, repetition_penalty: float):
     llm = get_local_llm()
     prompt = SYSTEM_PROMPT + "\n\n"
     return output["choices"][0]["text"].strip(), output["usage"]["total_tokens"]
+@app.post("/v1/chat", dependencies=[Security(verify_token)])
 async def chat(req: ChatRequest):
     try:
+        text, tokens = await call_service_api(req.messages, req.max_tokens, req.temperature)
         return {
             "model": "edyx-convo",
             "text": text,
             "source": "primary"
         }
     except Exception as e:
+        print(f"Service API failed: {e}, falling back to local model...")
     try:
         text, tokens = call_local_model(
             req.messages,