Set max_new_tokens to 2000
#1
by GeorgiosIoannouCoder - opened
app.py
CHANGED
|
@@ -202,17 +202,17 @@ class RAGQuestionAnswering:
|
|
| 202 |
@st.cache_resource
|
| 203 |
def load_pipe():
|
| 204 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
| 205 |
-
# True
|
| 206 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
print("torch.cuda.
|
| 210 |
-
|
| 211 |
-
|
| 212 |
|
| 213 |
-
pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct", device='cuda:0', max_length=1000)
|
| 214 |
return(pipe)
|
|
|
|
| 215 |
self.pipe = load_pipe()
|
|
|
|
| 216 |
return None
|
| 217 |
|
| 218 |
|
|
@@ -266,14 +266,16 @@ class RAGQuestionAnswering:
|
|
| 266 |
"""
|
| 267 |
|
| 268 |
formatted_prompt = self.prompt.format(**input_dict)
|
|
|
|
| 269 |
messages=[
|
| 270 |
{"role": "system", "content": formatted_prompt},
|
| 271 |
{"role": "user", "content": input_dict["question"]},
|
| 272 |
]
|
| 273 |
-
|
| 274 |
response = self.pipe(messages)
|
| 275 |
print("#"*88)
|
| 276 |
print(response, type(response))
|
|
|
|
| 277 |
return str(response)
|
| 278 |
|
| 279 |
def setup_rag_chain(self) -> None:
|
|
|
|
| 202 |
@st.cache_resource
|
| 203 |
def load_pipe():
|
| 204 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
|
|
|
| 205 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
| 206 |
+
print(f"torch.cuda.current_device(): {torch.cuda.current_device()}")
|
| 207 |
+
print(f"torch.cuda.device(0): {torch.cuda.device(0)}")
|
| 208 |
+
print(f"torch.cuda.get_device_name(0): {torch.cuda.get_device_name(0)}")
|
| 209 |
+
|
| 210 |
+
pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct", device='cuda:0', max_new_tokens=2000)
|
| 211 |
|
|
|
|
| 212 |
return(pipe)
|
| 213 |
+
|
| 214 |
self.pipe = load_pipe()
|
| 215 |
+
|
| 216 |
return None
|
| 217 |
|
| 218 |
|
|
|
|
| 266 |
"""
|
| 267 |
|
| 268 |
formatted_prompt = self.prompt.format(**input_dict)
|
| 269 |
+
|
| 270 |
messages=[
|
| 271 |
{"role": "system", "content": formatted_prompt},
|
| 272 |
{"role": "user", "content": input_dict["question"]},
|
| 273 |
]
|
| 274 |
+
|
| 275 |
response = self.pipe(messages)
|
| 276 |
print("#"*88)
|
| 277 |
print(response, type(response))
|
| 278 |
+
|
| 279 |
return str(response)
|
| 280 |
|
| 281 |
def setup_rag_chain(self) -> None:
|