a9 commited on
Commit
6f6890a
·
verified ·
1 Parent(s): 4e6fa27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -9
app.py CHANGED
@@ -415,28 +415,47 @@ async def read_root(response: Response):
415
  </body>
416
  </html>'''
417
 
418
- from openai import OpenAI
419
 
420
- client = OpenAI(base_url="http://localhost:8080/v1", api_key="no-key-required")
 
 
 
 
 
 
 
 
 
421
 
422
  class ChatRequest(BaseModel):
423
  """Request model for the chat endpoint."""
424
  prompt: str
425
 
 
 
 
 
 
 
 
426
  @app.post("/response")
427
  async def handle_chat(chat_request: ChatRequest, token: str = Cookie(None)):
 
428
  if token in Tokens:
429
  i = Tokens.index(token)
430
  History[i].append({"role": "user", "content": chat_request.prompt})
431
 
432
- stream = client.chat.completions.create(
433
- model="",
434
- messages=History[i],
435
- )
436
- History[i].append({"role": "assistant", "content": stream.choices[0].message.content})
 
 
 
437
  return {"text": stream.choices[0].message.content,
438
- "time": int((stream.timings["prompt_ms"] + stream.timings["predicted_ms"])/1000 - 3),
439
- "t_per_sec": round(stream.timings["predicted_per_second"] + 0.2, 2)}
440
  else: return 'Please stop. Just refresh the page.'
441
 
442
  @app.post("/history")
 
415
  </body>
416
  </html>'''
417
 
 
418
 
419
+ from google import genai
420
+ from google.genai import types
421
+ import requests
422
+
423
+
424
+ Api_key = os.getenv('API_KEY')
425
+ System_instruction = '''**System Prompt for a Programmer-Oriented Coding Assistant:**\n\n> You are a highly focused, fast, and expert-level coding assistant built for professional programmers.\n> Your primary role is **to assist with code writing, debugging, refactoring, optimization, and architecture**.\n> Avoid unnecessary explanations unless asked. Do not teach—**support the user like a senior pair programmer** who assumes context and skill. Prioritize clean, correct, and efficient code.\n\n> Always:\n> * Get straight to the point.\n> * Suggest the most practical and scalable solution.\n> * Respond with complete code blocks when needed.\n> * Use strong defaults and modern conventions.\n> * Assume the user knows what they're doing.\n> * Think ahead: anticipate potential pitfalls or better approaches.\n> * Give fast, minimal answers when asked for quick help.\n\n> Only elaborate if specifically requested (e.g., “explain,” “why,” “teach,” “verbose”)'''
426
+
427
+
428
+ client = genai.Client(api_key=Api_key)
429
 
430
  class ChatRequest(BaseModel):
431
  """Request model for the chat endpoint."""
432
  prompt: str
433
 
434
+ def gen(prompt):
435
+ response = client.models.generate_content(
436
+ model="gemma-3-4b-it",
437
+ contents= prompt
438
+ )
439
+ return response.text
440
+
441
  @app.post("/response")
442
  async def handle_chat(chat_request: ChatRequest, token: str = Cookie(None)):
443
+ a= time.time()
444
  if token in Tokens:
445
  i = Tokens.index(token)
446
  History[i].append({"role": "user", "content": chat_request.prompt})
447
 
448
+ text = '<start_of_turn>system\n'+System_instruction+'<end_of_turn>\n<start_of_turn>user\n'
449
+ for j in History[i]:
450
+ if j['role']== 'user':
451
+ text = text + j['content'] + '<end_of_turn>\n<start_of_turn>model\n'
452
+ else : text = text + j['content'] + '<end_of_turn>\n<start_of_turn>user\n'
453
+ stream = gen(prompt)
454
+ History[i].append({"role": "assistant", "content": stream})
455
+ b = time.time()
456
  return {"text": stream.choices[0].message.content,
457
+ "time": (b-a)/1000,
458
+ "t_per_sec": 0}
459
  else: return 'Please stop. Just refresh the page.'
460
 
461
  @app.post("/history")