Spaces:

moriire
/

OpenGenAI

Sleeping

moriire commited on Apr 5, 2024

Commit

6a34b4c

verified ·

1 Parent(s): f88f764

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,13 @@ import llama_cpp
 import llama_cpp.llama_tokenizer
 from pydantic import BaseModel
 llama = llama_cpp.Llama.from_pretrained(
     repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
     filename="*q4_0.gguf",
@@ -56,18 +63,10 @@ def index():
 @app.get("/health")
 def health():
     return {"status": "ok"}
-class GenModel(BaseModel):
-    question: str
-    system: str = "You are a story writing assistant."
-    temperature: float = 0.7
-    seed: int = 42
 # Chat Completion API
-@app.get("/generate_stream")
-async def complete(gen:GenModel
-) -> dict:
     try:
         st = time()
         output = llama.create_chat_completion(

 import llama_cpp.llama_tokenizer
 from pydantic import BaseModel
+class GenModel(BaseModel):
+    question: str
+    system: str = "You are a story writing assistant."
+    temperature: float = 0.7
+    seed: int = 42
 llama = llama_cpp.Llama.from_pretrained(
     repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
     filename="*q4_0.gguf",
 @app.get("/health")
 def health():
     return {"status": "ok"}
 # Chat Completion API
+@app.get("/generate_stream/")
+async def complete(gen:GenModel):
     try:
         st = time()
         output = llama.create_chat_completion(