Spaces:

my-ai-university
/

finite-element-method

Paused

App Files Files Community

mostafa-sh commited on Apr 9, 2025

Commit

fd97c8c

1 Parent(s): 2b27faa

add 3B model

Browse files

Files changed (2) hide show

app.py +20 -9
utils/llama_utils.py +5 -5

app.py CHANGED Viewed

@@ -35,6 +35,7 @@ st.markdown("""
 # ---------------------------------------
 base_path = "data/"
 base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
 adapter_path = "./LLaMA-TOMMI-1.0/"
 st.title(":red[AI University] :gray[/] FEM")
@@ -115,12 +116,12 @@ with st.sidebar:
         # Choose the LLM model
         st.session_state.synthesis_model = st.selectbox(
             "Choose the LLM model",
-            ["LLaMA-3.2-11B", "gpt-4o-mini"],
             index=1,
             key='a2model'
         )
-        if st.session_state.synthesis_model == "LLaMA-3.2-11B":
                 synthesis_do_sample = st.toggle("Enable Sampling", value=False, key='synthesis_sample')
                 if synthesis_do_sample:
@@ -169,6 +170,14 @@ with col2:
             help=question_help
         )
 with st.spinner("Loading LLaMA-3.2-11B..."):
     if "LLaMA-3.2-11B" in [st.session_state.expert_model, st.session_state.synthesis_model]:
         if 'llama_model' not in st.session_state:
@@ -176,12 +185,12 @@ with st.spinner("Loading LLaMA-3.2-11B..."):
             st.session_state.llama_model = llama_model
             st.session_state.llama_tokenizer = llama_tokenizer
-with st.spinner("Loading LLaMA-TOMMI-1.0-11B..."):
-    if st.session_state.expert_model == "LLaMA-TOMMI-1.0-11B":
-        if 'tommi_model' not in st.session_state:
-            tommi_model, tommi_tokenizer = load_fine_tuned_model(adapter_path, base_model_path)
-            st.session_state.tommi_model = tommi_model
-            st.session_state.tommi_tokenizer = tommi_tokenizer
 # Load YouTube and LaTeX data
 text_data_YT, context_embeddings_YT = load_youtube_data(base_path, model_name, yt_chunk_tokens, yt_overlap_tokens)
@@ -264,6 +273,7 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
                         model=model_,
                         tokenizer=tokenizer_,
                         messages=messages,
                         do_sample=expert_do_sample,
                         temperature=expert_temperature if expert_do_sample else None,
                         top_k=expert_top_k if expert_do_sample else None,
@@ -289,7 +299,7 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
             #-------------------------
             # synthesis responses
             #-------------------------
-            if st.session_state.synthesis_model == "LLaMA-3.2-11B":
                 synthesis_prompt = f"""
                 Question:
                 {st.session_state.question}
@@ -311,6 +321,7 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
                     model=st.session_state.llama_model,
                     tokenizer=st.session_state.llama_tokenizer,
                     messages=messages,
                     do_sample=synthesis_do_sample,
                     temperature=synthesis_temperature if synthesis_do_sample else None,
                     top_k=synthesis_top_k if synthesis_do_sample else None,

 # ---------------------------------------
 base_path = "data/"
 base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+base_model_path_3B = "meta-llama/Llama-3.2-3B-Instruct"
 adapter_path = "./LLaMA-TOMMI-1.0/"
 st.title(":red[AI University] :gray[/] FEM")
         # Choose the LLM model
         st.session_state.synthesis_model = st.selectbox(
             "Choose the LLM model",
+            ["LLaMA-3.2-3B","gpt-4o-mini"], # "LLaMA-3.2-11B",
             index=1,
             key='a2model'
         )
+        if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
                 synthesis_do_sample = st.toggle("Enable Sampling", value=False, key='synthesis_sample')
                 if synthesis_do_sample:
             help=question_help
         )
+with st.spinner("Loading LLaMA-TOMMI-1.0-11B..."):
+    if st.session_state.expert_model == "LLaMA-TOMMI-1.0-11B":
+        if 'tommi_model' not in st.session_state:
+            tommi_model, tommi_tokenizer = load_fine_tuned_model(adapter_path, base_model_path)
+            st.session_state.tommi_model = tommi_model
+            st.session_state.tommi_tokenizer = tommi_tokenizer
 with st.spinner("Loading LLaMA-3.2-11B..."):
     if "LLaMA-3.2-11B" in [st.session_state.expert_model, st.session_state.synthesis_model]:
         if 'llama_model' not in st.session_state:
             st.session_state.llama_model = llama_model
             st.session_state.llama_tokenizer = llama_tokenizer
+with st.spinner("Loading LLaMA-3.2-3B..."):
+    if "LLaMA-3.2-3B" in [st.session_state.expert_model, st.session_state.synthesis_model]:
+        if 'llama_model_3B' not in st.session_state:
+            llama_model_3B, llama_tokenizer_3B = load_base_model(base_model_path_3B)
+            st.session_state.llama_model_3B = llama_model_3B
+            st.session_state.llama_tokenizer_3B = llama_tokenizer_3B
 # Load YouTube and LaTeX data
 text_data_YT, context_embeddings_YT = load_youtube_data(base_path, model_name, yt_chunk_tokens, yt_overlap_tokens)
                         model=model_,
                         tokenizer=tokenizer_,
                         messages=messages,
+                        tokenizer_max_length=500,
                         do_sample=expert_do_sample,
                         temperature=expert_temperature if expert_do_sample else None,
                         top_k=expert_top_k if expert_do_sample else None,
             #-------------------------
             # synthesis responses
             #-------------------------
+            if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
                 synthesis_prompt = f"""
                 Question:
                 {st.session_state.question}
                     model=st.session_state.llama_model,
                     tokenizer=st.session_state.llama_tokenizer,
                     messages=messages,
+                    tokenizer_max_length=30000,
                     do_sample=synthesis_do_sample,
                     temperature=synthesis_temperature if synthesis_do_sample else None,
                     top_k=synthesis_top_k if synthesis_do_sample else None,

utils/llama_utils.py CHANGED Viewed

@@ -93,16 +93,16 @@ def generate_response(
     model: AutoModelForCausalLM,
     tokenizer: PreTrainedTokenizerFast,
     messages: list,
     do_sample: bool = False,
-    temperature: float = 0.7,
     top_k: int = 50,
     top_p: float = 0.95,
     num_beams: int = 1,
-    max_new_tokens: int = 500
 ) -> str:
     """
     Runs inference on an LLM model.
     Args:
         model (AutoModelForCausalLM)
         tokenizer (PreTrainedTokenizerFast)
@@ -124,7 +124,7 @@ def generate_response(
     # Tokenize input
     inputs = tokenizer(
         input_text,
-        max_length=500,
         truncation=True,
         return_tensors="pt"
     ).to(model.device)
@@ -158,4 +158,4 @@ def generate_response(
     response = re.sub(r'^\s*(?:answer\s*)+:?\s*', '', response, flags=re.IGNORECASE)
-    return response

     model: AutoModelForCausalLM,
     tokenizer: PreTrainedTokenizerFast,
     messages: list,
+    tokenizer_max_length: int = 500,
     do_sample: bool = False,
+    temperature: float = 0.1,
     top_k: int = 50,
     top_p: float = 0.95,
     num_beams: int = 1,
+    max_new_tokens: int = 700
 ) -> str:
     """
     Runs inference on an LLM model.
     Args:
         model (AutoModelForCausalLM)
         tokenizer (PreTrainedTokenizerFast)
     # Tokenize input
     inputs = tokenizer(
         input_text,
+        max_length=tokenizer_max_length,
         truncation=True,
         return_tensors="pt"
     ).to(model.device)
     response = re.sub(r'^\s*(?:answer\s*)+:?\s*', '', response, flags=re.IGNORECASE)
+    return response