Spaces:

FelipeErmeson
/

projeto-rag

Running on Zero

Felipe Silva commited on Sep 13

Commit

b62b49f

1 Parent(s): ec6b9dd

ajustes

Files changed (2) hide show

app.py CHANGED Viewed

@@ -17,8 +17,17 @@ print(zero.device) # <-- 'cpu' 🤔
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
-local_model_path = None
-local_emb_path = None
 def process_file(file):
     if file is None:
@@ -47,7 +56,6 @@ def ask_question(texto_extraido, question):
     resposta = rag_chain.run(question)
     return resposta
-@spaces.GPU
 def launch_app():
     with gr.Blocks() as demo:
         gr.Markdown("## ⚙️ Pergunte qualquer coisa para seu arquivo.")
@@ -70,15 +78,4 @@ def launch_app():
     demo.launch()
 if __name__ == "__main__":
-    name_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" #"Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8"
-    config.local_model_path = snapshot_download(
-        repo_id=name_model,
-        cache_dir="/root/.cache/huggingface",
-        local_files_only=False
-    )
-    config.local_emb_path = snapshot_download(
-        repo_id="sentence-transformers/all-MiniLM-L6-v2",
-        cache_dir="/root/.cache/huggingface",
-        local_files_only=False
-    )
     launch_app()

 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
+name_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" #"Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8"
+config.local_model_path = snapshot_download(
+    repo_id=name_model,
+    cache_dir="/root/.cache/huggingface",
+    local_files_only=False
+)
+config.local_emb_path = snapshot_download(
+    repo_id="sentence-transformers/all-MiniLM-L6-v2",
+    cache_dir="/root/.cache/huggingface",
+    local_files_only=False
+)
 def process_file(file):
     if file is None:
     resposta = rag_chain.run(question)
     return resposta
 def launch_app():
     with gr.Blocks() as demo:
         gr.Markdown("## ⚙️ Pergunte qualquer coisa para seu arquivo.")
     demo.launch()
 if __name__ == "__main__":
     launch_app()

rag_utils.py CHANGED Viewed

@@ -43,8 +43,8 @@ def get_model():
             raise ValueError("⚠️ config.local_model_path ainda não foi inicializado!")
         _model_instance = AutoModelForCausalLM.from_pretrained(
             config.local_model_path,
-            torch_dtype="auto",
-            device_map="auto",
             trust_remote_code=True
         )
@@ -52,7 +52,6 @@ def get_model():
 # _model_instance.to(device)
-@spaces.GPU
 def get_tokenizer():
     global _tokenizer
     if _tokenizer is None:

             raise ValueError("⚠️ config.local_model_path ainda não foi inicializado!")
         _model_instance = AutoModelForCausalLM.from_pretrained(
             config.local_model_path,
+            torch_dtype=torch.float16,
+            device_map={"": "cuda"},
             trust_remote_code=True
         )
 # _model_instance.to(device)
 def get_tokenizer():
     global _tokenizer
     if _tokenizer is None: