Alvin3y1 commited on
Commit
e35e970
·
verified ·
1 Parent(s): 081423f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -4,24 +4,14 @@ import uvicorn
4
  from fastapi import FastAPI, WebSocket
5
  from fastapi.responses import PlainTextResponse
6
 
7
- API_URL = "https://api.deepinfra.com/v1/openai/chat/completions"
 
8
  HEADERS = {
9
- "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0",
10
- "Accept": "text/event-stream",
11
- "Accept-Language": "en-US,en;q=0.5",
12
- "Accept-Encoding": "gzip, deflate, br, zstd",
13
- "Referer": "https://deepinfra.com/",
14
- "Content-Type": "application/json",
15
- "X-Deepinfra-Source": "model-embed",
16
- "Origin": "https://deepinfra.com",
17
- "Sec-GPC": "1",
18
- "Connection": "keep-alive",
19
- "Sec-Fetch-Dest": "empty",
20
- "Sec-Fetch-Mode": "cors",
21
- "Sec-Fetch-Site": "same-site",
22
- "Priority": "u=0",
23
  }
24
- MODEL = "Qwen/Qwen3-Coder-480B-A35B-Instruct"
25
 
26
  app = FastAPI()
27
 
@@ -50,11 +40,7 @@ async def websocket_endpoint(websocket: WebSocket):
50
  payload = {
51
  "model": MODEL,
52
  "messages": chat_history,
53
- "stream": True,
54
- "stream_options": {
55
- "include_usage": True,
56
- "continuous_usage_stats": True
57
- }
58
  }
59
 
60
  full_response = ""
@@ -69,11 +55,25 @@ async def websocket_endpoint(websocket: WebSocket):
69
  break
70
  try:
71
  data = json.loads(line_content)
72
- delta = data.get("choices", [{}])[0].get("delta", {})
73
- text_chunk = delta.get("content", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  if text_chunk:
75
  await websocket.send_text(text_chunk)
76
  full_response += text_chunk
 
77
  except Exception:
78
  continue
79
  except Exception:
 
4
  from fastapi import FastAPI, WebSocket
5
  from fastapi.responses import PlainTextResponse
6
 
7
+ # Updated API Configuration
8
+ API_URL = "https://theoldllm.vercel.app/api/proxy?provider=p7"
9
  HEADERS = {
10
+ "accept": "*/*",
11
+ "content-type": "application/json",
12
+ "origin": "https://theoldllm.vercel.app",
 
 
 
 
 
 
 
 
 
 
 
13
  }
14
+ MODEL = "gemini-3-pro-preview"
15
 
16
  app = FastAPI()
17
 
 
40
  payload = {
41
  "model": MODEL,
42
  "messages": chat_history,
43
+ "stream": True
 
 
 
 
44
  }
45
 
46
  full_response = ""
 
55
  break
56
  try:
57
  data = json.loads(line_content)
58
+ choices = data.get("choices", [{}])
59
+ if not choices:
60
+ continue
61
+
62
+ delta = choices[0].get("delta", {})
63
+
64
+ # Handle "Reasoning" (Thinking process) if present
65
+ reasoning_chunk = delta.get("reasoning_content")
66
+ if reasoning_chunk:
67
+ await websocket.send_text(reasoning_chunk)
68
+ full_response += reasoning_chunk
69
+
70
+ # Handle actual content
71
+ # We use (delta.get("content") or "") to handle cases where content is null
72
+ text_chunk = delta.get("content")
73
  if text_chunk:
74
  await websocket.send_text(text_chunk)
75
  full_response += text_chunk
76
+
77
  except Exception:
78
  continue
79
  except Exception: