thangquang09 commited on
Commit
03aa457
·
1 Parent(s): 1a496eb

Add application file

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. app.py +65 -0
  3. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12.9
2
+
3
+ COPY . .
4
+
5
+ WORKDIR /
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /requirements.txt
8
+
9
+ RUN curl -fsSL https://ollama.com/install.sh | sh
10
+
11
+ RUN ollama serve
12
+
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from llama_index.llms.ollama import Ollama
5
+ from llama_index.core.llms import ChatMessage
6
+ import time
7
+
8
+ app = FastAPI()
9
+
10
+ class Generate(BaseModel):
11
+ response: str
12
+ duration: float
13
+
14
+ chat_history = []
15
+ model = Ollama(model="llama3")
16
+
17
+ def generate_text(model: Ollama, prompt: str) -> {}:
18
+ if prompt == "":
19
+ return {
20
+ "response": "Please provide a prompt.",
21
+ "duration": str(0)
22
+ }
23
+
24
+ chat_history.append({
25
+ "role": "user",
26
+ "content": prompt
27
+ })
28
+
29
+ messages = [ChatMessage(role=msg["role"], content=msg["content"]) for msg in chat_history]
30
+
31
+ start_time = time.time()
32
+ response_gen = model.stream_chat(messages)
33
+ full_response = ""
34
+
35
+ try:
36
+ for response_chunk in response_gen:
37
+ full_response += response_chunk.delta
38
+ except Exception as e:
39
+ return {
40
+ "response": f"Error: {str(e)}",
41
+ "duration": 0
42
+ }
43
+
44
+
45
+ duration = time.time() - start_time
46
+
47
+ chat_history.append({
48
+ "role": "assistant",
49
+ "content": full_response
50
+ })
51
+
52
+ return {
53
+ "response": full_response,
54
+ "duration": f"{duration:.2f}"
55
+ }
56
+
57
+ @app.get("/")
58
+ async def root():
59
+ return {"message": "Hello World"}
60
+
61
+ @app.post("/api/generate", summary="Generate text from prompt", tags=["Generate"], response_model=Generate)
62
+ def inference(input_prompt: str):
63
+ return generate_text(model, input_prompt)
64
+
65
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ fastapi>=0.100.0 # Newer FastAPI version
3
+ llama-index
4
+ pydantic>=2.0.0 # Newer Pydantic version
5
+ Pillow
6
+ uvicorn