Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 28

Commit

8aebe10

verified ·

1 Parent(s): f027b47

Update local_llm.py

Browse files

Files changed (1) hide show

local_llm.py +12 -22

local_llm.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional, List
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from settings import OPEN_LLM_CANDIDATES, LOCAL_MAX_NEW_TOKENS
@@ -6,7 +6,6 @@ from settings import OPEN_LLM_CANDIDATES, LOCAL_MAX_NEW_TOKENS
 class LocalLLM:
     def __init__(self):
         self.pipe = None
-        self.model_id = None
         self._load_any()
     def _load_any(self):
@@ -14,31 +13,22 @@ class LocalLLM:
             try:
                 tok = AutoTokenizer.from_pretrained(mid, trust_remote_code=True)
                 mdl = AutoModelForCausalLM.from_pretrained(
-                    mid, device_map="auto", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                     trust_remote_code=True
                 )
                 self.pipe = pipeline("text-generation", model=mdl, tokenizer=tok)
-                self.model_id = mid
                 return
             except Exception:
                 continue
-        self.pipe = None
     def chat(self, prompt: str) -> Optional[str]:
-        if not self.pipe:
-            return None
-        try:
-            out = self.pipe(
-                prompt,
-                max_new_tokens=LOCAL_MAX_NEW_TOKENS,
-                do_sample=True,
-                temperature=0.3,
-                top_p=0.9,
-                repetition_penalty=1.12,
-                eos_token_id=self.pipe.tokenizer.eos_token_id
-            )
-            text = out[0]["generated_text"]
-            # Return only the continuation if prompt is included
-            return text[len(prompt):].strip() if text.startswith(prompt) else text.strip()
-        except Exception:
-            return None

+from typing import Optional
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from settings import OPEN_LLM_CANDIDATES, LOCAL_MAX_NEW_TOKENS
 class LocalLLM:
     def __init__(self):
         self.pipe = None
         self._load_any()
     def _load_any(self):
             try:
                 tok = AutoTokenizer.from_pretrained(mid, trust_remote_code=True)
                 mdl = AutoModelForCausalLM.from_pretrained(
+                    mid, device_map="auto",
+                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                     trust_remote_code=True
                 )
                 self.pipe = pipeline("text-generation", model=mdl, tokenizer=tok)
                 return
             except Exception:
                 continue
     def chat(self, prompt: str) -> Optional[str]:
+        if not self.pipe: return None
+        out = self.pipe(
+            prompt, max_new_tokens=LOCAL_MAX_NEW_TOKENS,
+            do_sample=True, temperature=0.3, top_p=0.9, repetition_penalty=1.12,
+            eos_token_id=self.pipe.tokenizer.eos_token_id
+        )
+        text = out[0]["generated_text"]
+        return text[len(prompt):].strip() if text.startswith(prompt) else text.strip()