Spaces:
Running
on
Zero
Running
on
Zero
Felipe Silva
commited on
Commit
·
b62b49f
1
Parent(s):
ec6b9dd
ajustes
Browse files- app.py +11 -14
- rag_utils.py +2 -3
app.py
CHANGED
|
@@ -17,8 +17,17 @@ print(zero.device) # <-- 'cpu' 🤔
|
|
| 17 |
|
| 18 |
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def process_file(file):
|
| 24 |
if file is None:
|
|
@@ -47,7 +56,6 @@ def ask_question(texto_extraido, question):
|
|
| 47 |
resposta = rag_chain.run(question)
|
| 48 |
return resposta
|
| 49 |
|
| 50 |
-
@spaces.GPU
|
| 51 |
def launch_app():
|
| 52 |
with gr.Blocks() as demo:
|
| 53 |
gr.Markdown("## ⚙️ Pergunte qualquer coisa para seu arquivo.")
|
|
@@ -70,15 +78,4 @@ def launch_app():
|
|
| 70 |
demo.launch()
|
| 71 |
|
| 72 |
if __name__ == "__main__":
|
| 73 |
-
name_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" #"Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8"
|
| 74 |
-
config.local_model_path = snapshot_download(
|
| 75 |
-
repo_id=name_model,
|
| 76 |
-
cache_dir="/root/.cache/huggingface",
|
| 77 |
-
local_files_only=False
|
| 78 |
-
)
|
| 79 |
-
config.local_emb_path = snapshot_download(
|
| 80 |
-
repo_id="sentence-transformers/all-MiniLM-L6-v2",
|
| 81 |
-
cache_dir="/root/.cache/huggingface",
|
| 82 |
-
local_files_only=False
|
| 83 |
-
)
|
| 84 |
launch_app()
|
|
|
|
| 17 |
|
| 18 |
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
|
| 19 |
|
| 20 |
+
name_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" #"Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8"
|
| 21 |
+
config.local_model_path = snapshot_download(
|
| 22 |
+
repo_id=name_model,
|
| 23 |
+
cache_dir="/root/.cache/huggingface",
|
| 24 |
+
local_files_only=False
|
| 25 |
+
)
|
| 26 |
+
config.local_emb_path = snapshot_download(
|
| 27 |
+
repo_id="sentence-transformers/all-MiniLM-L6-v2",
|
| 28 |
+
cache_dir="/root/.cache/huggingface",
|
| 29 |
+
local_files_only=False
|
| 30 |
+
)
|
| 31 |
|
| 32 |
def process_file(file):
|
| 33 |
if file is None:
|
|
|
|
| 56 |
resposta = rag_chain.run(question)
|
| 57 |
return resposta
|
| 58 |
|
|
|
|
| 59 |
def launch_app():
|
| 60 |
with gr.Blocks() as demo:
|
| 61 |
gr.Markdown("## ⚙️ Pergunte qualquer coisa para seu arquivo.")
|
|
|
|
| 78 |
demo.launch()
|
| 79 |
|
| 80 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
launch_app()
|
rag_utils.py
CHANGED
|
@@ -43,8 +43,8 @@ def get_model():
|
|
| 43 |
raise ValueError("⚠️ config.local_model_path ainda não foi inicializado!")
|
| 44 |
_model_instance = AutoModelForCausalLM.from_pretrained(
|
| 45 |
config.local_model_path,
|
| 46 |
-
torch_dtype=
|
| 47 |
-
device_map="
|
| 48 |
trust_remote_code=True
|
| 49 |
)
|
| 50 |
|
|
@@ -52,7 +52,6 @@ def get_model():
|
|
| 52 |
|
| 53 |
# _model_instance.to(device)
|
| 54 |
|
| 55 |
-
@spaces.GPU
|
| 56 |
def get_tokenizer():
|
| 57 |
global _tokenizer
|
| 58 |
if _tokenizer is None:
|
|
|
|
| 43 |
raise ValueError("⚠️ config.local_model_path ainda não foi inicializado!")
|
| 44 |
_model_instance = AutoModelForCausalLM.from_pretrained(
|
| 45 |
config.local_model_path,
|
| 46 |
+
torch_dtype=torch.float16,
|
| 47 |
+
device_map={"": "cuda"},
|
| 48 |
trust_remote_code=True
|
| 49 |
)
|
| 50 |
|
|
|
|
| 52 |
|
| 53 |
# _model_instance.to(device)
|
| 54 |
|
|
|
|
| 55 |
def get_tokenizer():
|
| 56 |
global _tokenizer
|
| 57 |
if _tokenizer is None:
|