Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -7,14 +7,24 @@ import torch
|
|
| 7 |
import os
|
| 8 |
import asyncio
|
| 9 |
|
| 10 |
-
# β
|
| 11 |
-
|
| 12 |
-
os.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# β
Load model and tokenizer
|
| 15 |
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# β
Use CUDA if available
|
| 20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
@@ -64,4 +74,4 @@ async def generate_response_chunks(prompt: str):
|
|
| 64 |
# β
API route
|
| 65 |
@app.post("/ask")
|
| 66 |
async def ask(question: Question):
|
| 67 |
-
return StreamingResponse(generate_response_chunks(question.question), media_type="text/plain"
|
|
|
|
| 7 |
import os
|
| 8 |
import asyncio
|
| 9 |
|
| 10 |
+
# β
Set all cache directories to a writable location
|
| 11 |
+
cache_dir = "/tmp/hf_home"
|
| 12 |
+
os.environ["HF_HOME"] = cache_dir
|
| 13 |
+
os.environ["TRANSFORMERS_CACHE"] = cache_dir
|
| 14 |
+
os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
|
| 15 |
+
|
| 16 |
+
# β
Create cache directory with proper permissions
|
| 17 |
+
os.makedirs(cache_dir, exist_ok=True)
|
| 18 |
+
os.chmod(cache_dir, 0o777) # Make writable by all
|
| 19 |
|
| 20 |
# β
Load model and tokenizer
|
| 21 |
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
|
| 22 |
+
try:
|
| 23 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir=cache_dir)
|
| 24 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, cache_dir=cache_dir)
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading model: {e}")
|
| 27 |
+
raise
|
| 28 |
|
| 29 |
# β
Use CUDA if available
|
| 30 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 74 |
# β
API route
|
| 75 |
@app.post("/ask")
|
| 76 |
async def ask(question: Question):
|
| 77 |
+
return StreamingResponse(generate_response_chunks(question.question), media_type="text/plain"
|