Spaces:

zhtet
/

RegBotBeta

Sleeping

App Files Files Community

Zwea Htet commited on Aug 6, 2023

Commit

991fc6b

1 Parent(s): 215cfd3

fixed ui

Browse files

Files changed (1) hide show

models/llamaCustom.py +29 -19

models/llamaCustom.py CHANGED Viewed

@@ -36,25 +36,32 @@ NUM_OUTPUT = 525
 # set maximum chunk overlap
 CHUNK_OVERLAP_RATION = 0.2
-llm_model_name = "bigscience/bloom-560m"
-tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
-model = AutoModelForCausalLM.from_pretrained(llm_model_name, config="T5Config")
-model_pipeline = pipeline(
-    model=model,
-    tokenizer=tokenizer,
-    task="text-generation",
-    # device=0, # GPU device number
-    # max_length=512,
-    do_sample=True,
-    top_p=0.95,
-    top_k=50,
-    temperature=0.7,
-)
 class CustomLLM(LLM):
-    pipeline = model_pipeline
     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
         prompt_length = len(prompt)
@@ -65,17 +72,19 @@ class CustomLLM(LLM):
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
-        return {"name_of_model": llm_model_name}
     @property
     def _llm_type(self) -> str:
         return "custom"
 class LlamaCustom:
     def __init__(self, model_name: str) -> None:
         self.vector_index = self.initialize_index(model_name=model_name)
-    def initialize_index(self, model_name: str):
         index_name = model_name.split("/")[-1]
         file_path = f"./vectorStores/{index_name}"
@@ -97,7 +106,8 @@ class LlamaCustom:
                 num_output=NUM_OUTPUT,
                 chunk_overlap_ratio=CHUNK_OVERLAP_RATION,
             )
-            llm_predictor = LLMPredictor(llm=CustomLLM())
             service_context = ServiceContext.from_defaults(
                 llm_predictor=llm_predictor, prompt_helper=prompt_helper
             )

 # set maximum chunk overlap
 CHUNK_OVERLAP_RATION = 0.2
+@st.cache_resource
+def load_model(mode_name: str):
+    # llm_model_name = "bigscience/bloom-560m"
+    tokenizer = AutoTokenizer.from_pretrained(mode_name)
+    model = AutoModelForCausalLM.from_pretrained(mode_name, config="T5Config")
+    pipe = pipeline(
+        task="text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        # device=0, # GPU device number
+        # max_length=512,
+        do_sample=True,
+        top_p=0.95,
+        top_k=50,
+        temperature=0.7,
+    )
+    return pipe
 class CustomLLM(LLM):
+    def __init__(self, model_name: str):
+        self.llm_model_name = model_name
+        self.pipeline = load_model(mode_name=model_name)
     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
         prompt_length = len(prompt)
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
+        return {"name_of_model": self.llm_model_name}
     @property
     def _llm_type(self) -> str:
         return "custom"
 class LlamaCustom:
     def __init__(self, model_name: str) -> None:
         self.vector_index = self.initialize_index(model_name=model_name)
+    @st.cache_resource
+    def initialize_index(_self, model_name: str):
         index_name = model_name.split("/")[-1]
         file_path = f"./vectorStores/{index_name}"
                 num_output=NUM_OUTPUT,
                 chunk_overlap_ratio=CHUNK_OVERLAP_RATION,
             )
+            llm_predictor = LLMPredictor(llm=CustomLLM(model_name=model_name))
             service_context = ServiceContext.from_defaults(
                 llm_predictor=llm_predictor, prompt_helper=prompt_helper
             )