Spaces:

the7thdialect
/

finbuddy

Sleeping

App Files Files Community

Ani07-05 commited on Apr 11, 2025

Commit

7b9bc7a

1 Parent(s): 92cd1f8

Switch to WiroAI-Finance-Qwen-1.5B model

Browse files

Files changed (1) hide show

app.py +85 -67

app.py CHANGED Viewed

@@ -1,121 +1,139 @@
 import streamlit as st
 import os
-from transformers import pipeline
-import torch # PyTorch is commonly used by transformers
 # --- Set Page Config FIRST ---
-st.set_page_config(layout="wide") # Use wider layout
 # --- Configuration ---
-MODEL_NAME = "AdaptLLM/finance-LLM"
-# Attempt to get token from secrets, handle case where it might not be set yet
 HF_TOKEN = os.environ.get("HF_TOKEN")
 # --- Model Loading (Cached by Streamlit for efficiency) ---
-@st.cache_resource # Cache the pipeline object
-def load_text_generation_pipeline():
-    """Loads the text generation pipeline."""
     if not HF_TOKEN:
         st.warning("HF_TOKEN secret not found. Ensure the model is public or add the token to secrets.")
-        # Decide if you want to stop or proceed cautiously
-        # st.stop() # Uncomment this line to halt execution if token is strictly required
     try:
-        # Determine device: Use GPU (cuda:0) if available, otherwise CPU (-1)
-        # Free Spaces typically only have CPU, so device will likely be -1
-        device = 0 if torch.cuda.is_available() else -1
-        st.info(f"Loading model {MODEL_NAME}... This might take a while on the first run.")
-        # Use pipeline for easier text generation
         generator = pipeline(
             "text-generation",
             model=MODEL_NAME,
-            tokenizer=MODEL_NAME,
-            torch_dtype=torch.float16,
-            device=device,
-            trust_remote_code=True
         )
         st.success(f"Model {MODEL_NAME} loaded successfully!")
-        return generator
     except Exception as e:
-        st.error(f"Error loading model pipeline: {e}", icon="🔥")
-        st.error("This could be due to memory limits on the free tier, missing token for a private model, or other issues.")
-        st.stop() # Stop the app if the model fails to load
-# --- Load the Model Pipeline ---
-generator = load_text_generation_pipeline()
 # --- Streamlit App UI ---
 st.title("💰 FinBuddy Assistant")
-st.caption("Your AI-powered financial planning assistant (Text Chat - v1)")
-# Initialize chat history in session state if it doesn't exist
 if "messages" not in st.session_state:
-    st.session_state.messages = []
-# Display past chat messages
 for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"]) # Display content as markdown
-# Get user input using chat_input
 if prompt := st.chat_input("Ask a question about finance..."):
-    # Add user message to session state and display it
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
     # Generate assistant response
     with st.chat_message("assistant"):
-        message_placeholder = st.empty() # Create placeholder for streaming/final response
-        message_placeholder.markdown("Thinking...⏳") # Initial thinking message
-        # --- Prepare prompt for the model ---
-        # Simple approach: just use the latest user prompt.
-        # TODO: Improve this later to include conversation history for better context.
-        prompt_for_model = prompt
         try:
             # Generate response using the pipeline
             outputs = generator(
-                prompt_for_model,
-                max_new_tokens=512,  # Limit the length of the response
-                num_return_sequences=1,
-                eos_token_id=generator.tokenizer.eos_token_id,
-                pad_token_id=generator.tokenizer.eos_token_id # Helps prevent warnings/issues
             )
-            if outputs and len(outputs) > 0 and 'generated_text' in outputs[0]:
-                # Extract the generated text
-                full_response = outputs[0]['generated_text']
-                # --- Attempt to clean the response ---
-                # The pipeline often returns the prompt + response. Try to remove the prompt part.
-                if full_response.startswith(prompt_for_model):
-                     assistant_response = full_response[len(prompt_for_model):].strip()
-                     # Sometimes models add their own role prefix
-                     if assistant_response.lower().startswith("assistant:"):
-                         assistant_response = assistant_response[len("assistant:"):].strip()
-                     elif assistant_response.lower().startswith("response:"):
-                         assistant_response = assistant_response[len("response:"):].strip()
                 else:
-                     assistant_response = full_response # Fallback if prompt isn't found at start
-                # Handle cases where the response might be empty after cleaning
                 if not assistant_response:
-                    assistant_response = "I received your message, but I don't have a further response right now."
             else:
-                assistant_response = "Sorry, I couldn't generate a response."
-            # Display the final response
             message_placeholder.markdown(assistant_response)
-            # Add the final assistant response to session state
             st.session_state.messages.append({"role": "assistant", "content": assistant_response})
         except Exception as e:
             error_message = f"Error during text generation: {e}"
             st.error(error_message, icon="🔥")
-            message_placeholder.markdown("Sorry, an error occurred while generating the response.")
-            # Add error indication to history
             st.session_state.messages.append({"role": "assistant", "content": f"[Error: {e}]"})

 import streamlit as st
 import os
+from transformers import pipeline, AutoTokenizer # Added AutoTokenizer
+import torch
 # --- Set Page Config FIRST ---
+st.set_page_config(layout="wide")
 # --- Configuration ---
+# MODEL_NAME = "AdaptLLM/finance-LLM" # Old model
+MODEL_NAME = "WiroAI/WiroAI-Finance-Qwen-1.5B" # New smaller model
 HF_TOKEN = os.environ.get("HF_TOKEN")
 # --- Model Loading (Cached by Streamlit for efficiency) ---
+@st.cache_resource
+def load_resources():
+    """Loads the tokenizer and the text generation pipeline."""
     if not HF_TOKEN:
         st.warning("HF_TOKEN secret not found. Ensure the model is public or add the token to secrets.")
     try:
+        st.info(f"Loading tokenizer for {MODEL_NAME}...")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN if HF_TOKEN else None)
+        st.success("Tokenizer loaded.")
+        # Determine device: Use GPU if available, otherwise CPU
+        # device_map="auto" might be problematic on CPU-only Spaces
+        # Start with device_map="auto", but fall back to explicit cpu if needed
+        device_map_setting = "auto"
+        # device = 0 if torch.cuda.is_available() else -1 # Alternative: explicit device
+        st.info(f"Loading model {MODEL_NAME}... (Using {device_map_setting}) This might take a while.")
+        # Use pipeline
         generator = pipeline(
             "text-generation",
             model=MODEL_NAME,
+            tokenizer=tokenizer, # Pass loaded tokenizer
+            model_kwargs={"torch_dtype": torch.bfloat16}, # Use bfloat16 as per model card
+            device_map=device_map_setting,
+            # device=device # Use this if device_map causes issues
+            trust_remote_code=True
         )
         st.success(f"Model {MODEL_NAME} loaded successfully!")
+        return generator, tokenizer # Return both
     except Exception as e:
+        st.error(f"Error loading model/tokenizer: {e}", icon="🔥")
+        st.error("Check memory limits, token access, or try removing device_map='auto'.")
+        st.stop()
+# --- Load Resources ---
+generator, tokenizer = load_resources()
 # --- Streamlit App UI ---
 st.title("💰 FinBuddy Assistant")
+st.caption(f"Model: {MODEL_NAME}")
 if "messages" not in st.session_state:
+    # Add initial system message (as per model card example)
+    st.session_state.messages = [
+        {"role": "system", "content": "You are a finance chatbot developed by Wiro AI"}
+    ]
+# Display past chat messages (excluding system message)
 for message in st.session_state.messages:
+    if message["role"] != "system": # Don't display system message
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+# Get user input
 if prompt := st.chat_input("Ask a question about finance..."):
+    # Add user prompt to state and display
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
     # Generate assistant response
     with st.chat_message("assistant"):
+        message_placeholder = st.empty()
+        message_placeholder.markdown("Thinking...⏳")
+        # --- Prepare prompt for the model (use message history) ---
+        # Use the messages stored in session state (includes system prompt)
+        messages_for_api = st.session_state.messages
+        # --- Define terminators as per model card ---
+        terminators = [
+            tokenizer.eos_token_id,
+            tokenizer.convert_tokens_to_ids("<|end_of_text|>") # Qwen uses <|end_of_text|> usually
+        ]
+        # Handle potential errors if the specific token doesn't exist
+        terminators = [term for term in terminators if term is not None and not isinstance(term, list)] # Filter out None or lists if conversion fails
         try:
             # Generate response using the pipeline
             outputs = generator(
+                messages_for_api, # Pass the list of messages
+                max_new_tokens=512,
+                eos_token_id=terminators,
+                pad_token_id=tokenizer.eos_token_id, # Use EOS for padding
+                do_sample=True,
+                temperature=0.7, # Adjusted slightly from example
+                top_p=0.95, # Added common param
+                # top_k=50 # Optional parameter
             )
+            # --- Extract response ---
+            # The output format is a list containing a dictionary with 'generated_text'
+            # which itself is a list of message dictionaries.
+            if (outputs and
+                isinstance(outputs, list) and
+                len(outputs) > 0 and
+                isinstance(outputs[0], dict) and
+                'generated_text' in outputs[0] and
+                isinstance(outputs[0]['generated_text'], list) and
+                len(outputs[0]['generated_text']) > 0):
+                # Get the last message dictionary in the generated list (should be the assistant's reply)
+                last_message = outputs[0]['generated_text'][-1]
+                if isinstance(last_message, dict) and last_message.get('role') == 'assistant':
+                     assistant_response = last_message.get('content', "").strip()
                 else:
+                     # Fallback if format is unexpected - try getting last element's text if it's a string?
+                     assistant_response = str(outputs[0]['generated_text'][-1]).strip()
                 if not assistant_response:
+                   assistant_response = "I generated an empty response."
             else:
+                print("Unexpected output format:", outputs) # Log for debugging
+                assistant_response = "Sorry, I couldn't parse the response format."
             message_placeholder.markdown(assistant_response)
             st.session_state.messages.append({"role": "assistant", "content": assistant_response})
         except Exception as e:
             error_message = f"Error during text generation: {e}"
             st.error(error_message, icon="🔥")
+            message_placeholder.markdown("Sorry, an error occurred generating the response.")
             st.session_state.messages.append({"role": "assistant", "content": f"[Error: {e}]"})