Spaces:
Sleeping
Sleeping
Commit
·
cf4196c
1
Parent(s):
30a261c
Update main.py
Browse files
main.py
CHANGED
|
@@ -41,30 +41,23 @@ llms = {
|
|
| 41 |
"suffix": "<|endoftext|><|assistant|>"
|
| 42 |
}
|
| 43 |
}
|
| 44 |
-
|
| 45 |
-
#Pydantic object
|
| 46 |
-
class validation(BaseModel):
|
| 47 |
-
prompt: str
|
| 48 |
-
llm: str
|
| 49 |
-
max_tokens: int = 512
|
| 50 |
-
nctx: int = 2048
|
| 51 |
|
| 52 |
|
| 53 |
#Fast API
|
| 54 |
app = FastAPI()
|
| 55 |
|
| 56 |
@app.post("/llm_on_cpu")
|
| 57 |
-
async def stream(item:
|
| 58 |
|
| 59 |
-
model = llms[item
|
| 60 |
prefix=model['prefix']
|
| 61 |
suffix=model['suffix']
|
| 62 |
-
nctx = item
|
| 63 |
-
max_tokens = item
|
| 64 |
user="""
|
| 65 |
{prompt}"""
|
| 66 |
|
| 67 |
llm = Llama(model_path="./code/"+model['file'], n_ctx=model['nctx'], verbose=False, n_threads=8)
|
| 68 |
|
| 69 |
-
prompt = f"{prefix}{user.replace('{prompt}', item
|
| 70 |
return llm(prompt, max_tokens=max_tokens)
|
|
|
|
| 41 |
"suffix": "<|endoftext|><|assistant|>"
|
| 42 |
}
|
| 43 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
#Fast API
|
| 47 |
app = FastAPI()
|
| 48 |
|
| 49 |
@app.post("/llm_on_cpu")
|
| 50 |
+
async def stream(item: dict):
|
| 51 |
|
| 52 |
+
model = llms[item['llm']]
|
| 53 |
prefix=model['prefix']
|
| 54 |
suffix=model['suffix']
|
| 55 |
+
nctx = item['nctx'] if 'nctx' is not in item.keys() else model['nctx']
|
| 56 |
+
max_tokens = item['max_tokens'] if 'max_tokens' is not in item.keys() else 512
|
| 57 |
user="""
|
| 58 |
{prompt}"""
|
| 59 |
|
| 60 |
llm = Llama(model_path="./code/"+model['file'], n_ctx=model['nctx'], verbose=False, n_threads=8)
|
| 61 |
|
| 62 |
+
prompt = f"{prefix}{user.replace('{prompt}', item['prompt'])}{suffix}"
|
| 63 |
return llm(prompt, max_tokens=max_tokens)
|