shashank-indukuri commited on
Commit
f7e2d47
·
verified ·
1 Parent(s): c064ea6

add streaming

Browse files
Files changed (1) hide show
  1. main.py +7 -0
main.py CHANGED
@@ -15,8 +15,10 @@ from slowapi.util import get_remote_address
15
  # Initialize the limiter
16
  limiter = Limiter(key_func=get_remote_address)
17
 
 
18
  # MongoDB client
19
  client = AsyncIOMotorClient('mongodb+srv://vanaraai:0YzPmeArBjsIVKqd@jobcluster.thp8ohx.mongodb.net/?retryWrites=true&w=majority')
 
20
  db = client['starlight_tales']
21
 
22
  # Load the model
@@ -25,6 +27,7 @@ llm = AutoModelForCausalLM.from_pretrained(
25
  model_type='llama',
26
  max_new_tokens=2000,
27
  threads=3,
 
28
  )
29
 
30
  # FastAPI app
@@ -95,6 +98,10 @@ async def stream_stories(item: Validation):
95
  """
96
  prompt = "<|user|>\nA bedtime tiny story for the children<\/s>\n<|assistant|>\n"
97
  return StreamingResponse(io.StringIO(llm(prompt)), media_type="text/plain")
 
 
 
 
98
 
99
 
100
  @app.post("/feedback")
 
15
  # Initialize the limiter
16
  limiter = Limiter(key_func=get_remote_address)
17
 
18
+
19
  # MongoDB client
20
  client = AsyncIOMotorClient('mongodb+srv://vanaraai:0YzPmeArBjsIVKqd@jobcluster.thp8ohx.mongodb.net/?retryWrites=true&w=majority')
21
+
22
  db = client['starlight_tales']
23
 
24
  # Load the model
 
27
  model_type='llama',
28
  max_new_tokens=2000,
29
  threads=3,
30
+ context_length = 4096,
31
  )
32
 
33
  # FastAPI app
 
98
  """
99
  prompt = "<|user|>\nA bedtime tiny story for the children<\/s>\n<|assistant|>\n"
100
  return StreamingResponse(io.StringIO(llm(prompt)), media_type="text/plain")
101
+ #working code for streaming
102
+ # for text in llm(f"{prompt}", stream=True):
103
+ # print(text, end="", flush=True)
104
+ # #return StreamingResponse(llm(prompt,stream=True), media_type="text/event-stream")
105
 
106
 
107
  @app.post("/feedback")