Spaces:

FelipeErmeson
/

projeto-rag

Sleeping

Felipe Silva commited on Sep 13, 2025

Commit

c3bd22b

1 Parent(s): 315fbb4

ajsute

Files changed (2) hide show

app.py CHANGED Viewed

@@ -46,6 +46,7 @@ def process_file(file):
         texto_extraido = "OCR não implementado neste exemplo."
     return texto_extraido or "Não foi possível extrair texto."
 def ask_question(texto_extraido, question):
     # RAG

         texto_extraido = "OCR não implementado neste exemplo."
     return texto_extraido or "Não foi possível extrair texto."
+@spaces.GPU
 def ask_question(texto_extraido, question):
     # RAG

rag_utils.py CHANGED Viewed

@@ -34,7 +34,7 @@ def get_embedding_model():
 # model_name = "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8" #"Qwen/Qwen2.5-7B-Instruct-AWQ" #"Qwen/Qwen2.5-7B-Instruct"
-@spaces.GPU
 def get_model():
     global _model_instance
     if _model_instance is None:
@@ -42,7 +42,7 @@ def get_model():
             raise ValueError("⚠️ config.local_model_path ainda não foi inicializado!")
         _model_instance = AutoModelForCausalLM.from_pretrained(
             config.local_model_path,
-            torch_dtype=torch.float16,
             device_map={"": "cuda"},
             trust_remote_code=True
         )
@@ -66,7 +66,7 @@ def create_split_doc(raw_text):
     return docs
-@spaces.GPU
 def store_docs(docs):
     embedding_model = get_embedding_model()
     vectorstore = FAISS.from_documents(docs, embedding_model)
@@ -90,7 +90,7 @@ Pergunta:
 )
     return prompt_template
-@spaces.GPU
 def create_rag_chain(vectorstore):
     pipe = pipeline(
         "text-generation",
@@ -98,8 +98,7 @@ def create_rag_chain(vectorstore):
         tokenizer=get_tokenizer(),
         max_new_tokens=512,
         temperature=0.1,
-        do_sample=False,
-        device=-1
     )
     # Adapta para LangChain

 # model_name = "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8" #"Qwen/Qwen2.5-7B-Instruct-AWQ" #"Qwen/Qwen2.5-7B-Instruct"
+# @spaces.GPU
 def get_model():
     global _model_instance
     if _model_instance is None:
             raise ValueError("⚠️ config.local_model_path ainda não foi inicializado!")
         _model_instance = AutoModelForCausalLM.from_pretrained(
             config.local_model_path,
+            dtype=torch.float16,
             device_map={"": "cuda"},
             trust_remote_code=True
         )
     return docs
+# @spaces.GPU
 def store_docs(docs):
     embedding_model = get_embedding_model()
     vectorstore = FAISS.from_documents(docs, embedding_model)
 )
     return prompt_template
+# @spaces.GPU
 def create_rag_chain(vectorstore):
     pipe = pipeline(
         "text-generation",
         tokenizer=get_tokenizer(),
         max_new_tokens=512,
         temperature=0.1,
+        do_sample=False
     )
     # Adapta para LangChain