Adi362 commited on
Commit
992fbe7
·
verified ·
1 Parent(s): f5c37d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -31
app.py CHANGED
@@ -2,22 +2,35 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from typing import List, Optional
 
 
5
 
6
  app = FastAPI()
7
 
8
 
9
- SYSTEM_PROMPT = """You are Edyx.
10
- You are a helpful, harmless, and honest AI assistant.
11
- """
12
 
13
- llm = Llama(
14
- model_path="/models/model.gguf",
15
 
16
- n_ctx=4096,
17
- n_threads=2,
18
- n_batch=128,
19
- verbose=False
20
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  class Message(BaseModel):
23
  role: str
@@ -25,40 +38,100 @@ class Message(BaseModel):
25
 
26
  class ChatRequest(BaseModel):
27
  messages: List[Message]
28
-
29
  max_tokens: Optional[int] = 1024
30
  temperature: Optional[float] = 0.7
31
  repetition_penalty: Optional[float] = 1.1
32
 
33
- @app.post("/v1/chat")
34
- def chat(req: ChatRequest):
35
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  prompt = SYSTEM_PROMPT + "\n\n"
37
-
38
- for m in req.messages:
39
  role = m.role.lower()
40
  if role == "system":
41
-
42
- prompt = f"{m.content}\n\n"
43
  else:
44
- prompt += f"{role}: {m.content}\n"
45
-
46
  prompt += "assistant:"
47
 
48
-
49
  output = llm(
50
  prompt,
51
- max_tokens=req.max_tokens,
52
- temperature=req.temperature,
53
  top_p=0.9,
54
- repeat_penalty=req.repetition_penalty,
55
  stop=["user:", "assistant:", "<|end|>", "User:"]
56
  )
 
 
57
 
58
- text = output["choices"][0]["text"].strip()
59
-
60
- return {
61
- "model": "edyx-convo",
62
- "text": text,
63
- "tokens": output["usage"]["total_tokens"]
64
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from typing import List, Optional
5
+ import httpx
6
+ import os
7
 
8
  app = FastAPI()
9
 
10
 
11
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
+ GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
13
+ GROQ_MODEL = "llama-3.3-70b-versatile"
14
 
 
 
15
 
16
+ SYSTEM_PROMPT = """You are a helpful, harmless, and honest AI assistant.
17
+ Provide clear and conversational responses."""
18
+
19
+
20
+ local_llm = None
21
+
22
+ def get_local_llm():
23
+ global local_llm
24
+ if local_llm is None:
25
+ print("Loading local fallback model...")
26
+ local_llm = Llama(
27
+ model_path="/models/model.gguf",
28
+ n_ctx=4096,
29
+ n_threads=2,
30
+ n_batch=128,
31
+ verbose=False
32
+ )
33
+ return local_llm
34
 
35
  class Message(BaseModel):
36
  role: str
 
38
 
39
  class ChatRequest(BaseModel):
40
  messages: List[Message]
 
41
  max_tokens: Optional[int] = 1024
42
  temperature: Optional[float] = 0.7
43
  repetition_penalty: Optional[float] = 1.1
44
 
45
+ @app.get("/")
46
+ def root():
47
+ return {"status": "edyx convo model running", "mode": "groq-primary"}
48
+
49
+ async def call_groq_api(messages: List[Message], max_tokens: int, temperature: float):
50
+ """Try to get response from Groq API"""
51
+ if not GROQ_API_KEY:
52
+ raise Exception("GROQ_API_KEY not configured")
53
+
54
+ groq_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
55
+ for m in messages:
56
+ groq_messages.append({"role": m.role, "content": m.content})
57
+
58
+ async with httpx.AsyncClient(timeout=45.0) as client:
59
+ response = await client.post(
60
+ GROQ_API_URL,
61
+ headers={
62
+ "Content-Type": "application/json",
63
+ "Authorization": f"Bearer {GROQ_API_KEY}"
64
+ },
65
+ json={
66
+ "model": GROQ_MODEL,
67
+ "messages": groq_messages,
68
+ "max_tokens": max_tokens,
69
+ "temperature": temperature
70
+ }
71
+ )
72
+
73
+ if response.status_code != 200:
74
+ raise Exception(f"Groq API error: {response.status_code} - {response.text}")
75
+
76
+ data = response.json()
77
+ return data["choices"][0]["message"]["content"], data["usage"]["total_tokens"]
78
+
79
+ def call_local_model(messages: List[Message], max_tokens: int, temperature: float, repetition_penalty: float):
80
+ """Fallback to local llama model - YOUR ORIGINAL LOGIC"""
81
+ llm = get_local_llm()
82
+
83
  prompt = SYSTEM_PROMPT + "\n\n"
84
+ for m in messages:
 
85
  role = m.role.lower()
86
  if role == "system":
87
+ prompt = f"{m.content}\n\n"
 
88
  else:
89
+ prompt += f"{role}: {m.content}\n"
 
90
  prompt += "assistant:"
91
 
 
92
  output = llm(
93
  prompt,
94
+ max_tokens=max_tokens,
95
+ temperature=temperature,
96
  top_p=0.9,
97
+ repeat_penalty=repetition_penalty,
98
  stop=["user:", "assistant:", "<|end|>", "User:"]
99
  )
100
+
101
+ return output["choices"][0]["text"].strip(), output["usage"]["total_tokens"]
102
 
103
+ @app.post("/v1/chat")
104
+ async def chat(req: ChatRequest):
105
+ # Try Groq API first (fast path)
106
+ try:
107
+ text, tokens = await call_groq_api(req.messages, req.max_tokens, req.temperature)
108
+ return {
109
+ "model": "edyx-convo",
110
+ "text": text,
111
+ "tokens": tokens,
112
+ "source": "primary"
113
+ }
114
+ except Exception as e:
115
+ print(f"Groq API failed: {e}, falling back to local model...")
116
+
117
+ # Fallback to local model - YOUR ORIGINAL CODE
118
+ try:
119
+ text, tokens = call_local_model(
120
+ req.messages,
121
+ req.max_tokens,
122
+ req.temperature,
123
+ req.repetition_penalty
124
+ )
125
+ return {
126
+ "model": "edyx-convo",
127
+ "text": text,
128
+ "tokens": tokens,
129
+ "source": "fallback"
130
+ }
131
+ except Exception as e:
132
+ return {
133
+ "model": "edyx-convo",
134
+ "text": f"Error: Both primary and fallback failed. {str(e)}",
135
+ "tokens": 0,
136
+ "source": "error"
137
+ }