Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| from transformers import pipeline, AutoTokenizer # Added AutoTokenizer | |
| import torch | |
| # --- Set Page Config FIRST --- | |
| st.set_page_config(layout="wide") | |
| # --- Configuration --- | |
| # MODEL_NAME = "AdaptLLM/finance-LLM" # Old model | |
| MODEL_NAME = "WiroAI/WiroAI-Finance-Qwen-1.5B" # New smaller model | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| # --- Model Loading (Cached by Streamlit for efficiency) --- | |
| def load_resources(): | |
| """Loads the tokenizer and the text generation pipeline.""" | |
| if not HF_TOKEN: | |
| st.warning("HF_TOKEN secret not found. Ensure the model is public or add the token to secrets.") | |
| try: | |
| st.info(f"Loading tokenizer for {MODEL_NAME}...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN if HF_TOKEN else None) | |
| st.success("Tokenizer loaded.") | |
| # Determine device: Use GPU if available, otherwise CPU | |
| # device_map="auto" might be problematic on CPU-only Spaces | |
| # Start with device_map="auto", but fall back to explicit cpu if needed | |
| device_map_setting = "auto" | |
| # device = 0 if torch.cuda.is_available() else -1 # Alternative: explicit device | |
| st.info(f"Loading model {MODEL_NAME}... (Using {device_map_setting}) This might take a while.") | |
| # Use pipeline | |
| generator = pipeline( | |
| "text-generation", | |
| model=MODEL_NAME, | |
| tokenizer=tokenizer, # Pass loaded tokenizer | |
| model_kwargs={"torch_dtype": torch.bfloat16}, # Use bfloat16 as per model card | |
| device_map=device_map_setting, | |
| # device=device # Use this if device_map causes issues | |
| trust_remote_code=True | |
| ) | |
| st.success(f"Model {MODEL_NAME} loaded successfully!") | |
| return generator, tokenizer # Return both | |
| except Exception as e: | |
| st.error(f"Error loading model/tokenizer: {e}", icon="π₯") | |
| st.error("Check memory limits, token access, or try removing device_map='auto'.") | |
| st.stop() | |
| # --- Load Resources --- | |
| generator, tokenizer = load_resources() | |
| # --- Streamlit App UI --- | |
| st.title("π° FinBuddy Assistant") | |
| st.caption(f"Model: {MODEL_NAME}") | |
| if "messages" not in st.session_state: | |
| # Add initial system message (as per model card example) | |
| st.session_state.messages = [ | |
| {"role": "system", "content": "You are a finance chatbot developed by Wiro AI"} | |
| ] | |
| # Display past chat messages (excluding system message) | |
| for message in st.session_state.messages: | |
| if message["role"] != "system": # Don't display system message | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| # Get user input | |
| if prompt := st.chat_input("Ask a question about finance..."): | |
| # Add user prompt to state and display | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| # Generate assistant response | |
| with st.chat_message("assistant"): | |
| message_placeholder = st.empty() | |
| message_placeholder.markdown("Thinking...β³") | |
| # --- Prepare prompt for the model (use message history) --- | |
| # Use the messages stored in session state (includes system prompt) | |
| messages_for_api = st.session_state.messages | |
| # --- Define terminators as per model card --- | |
| terminators = [ | |
| tokenizer.eos_token_id, | |
| tokenizer.convert_tokens_to_ids("<|end_of_text|>") # Qwen uses <|end_of_text|> usually | |
| ] | |
| # Handle potential errors if the specific token doesn't exist | |
| terminators = [term for term in terminators if term is not None and not isinstance(term, list)] # Filter out None or lists if conversion fails | |
| try: | |
| # Generate response using the pipeline | |
| outputs = generator( | |
| messages_for_api, # Pass the list of messages | |
| max_new_tokens=512, | |
| eos_token_id=terminators, | |
| pad_token_id=tokenizer.eos_token_id, # Use EOS for padding | |
| do_sample=True, | |
| temperature=0.7, # Adjusted slightly from example | |
| top_p=0.95, # Added common param | |
| # top_k=50 # Optional parameter | |
| ) | |
| # --- Extract response --- | |
| # The output format is a list containing a dictionary with 'generated_text' | |
| # which itself is a list of message dictionaries. | |
| if (outputs and | |
| isinstance(outputs, list) and | |
| len(outputs) > 0 and | |
| isinstance(outputs[0], dict) and | |
| 'generated_text' in outputs[0] and | |
| isinstance(outputs[0]['generated_text'], list) and | |
| len(outputs[0]['generated_text']) > 0): | |
| # Get the last message dictionary in the generated list (should be the assistant's reply) | |
| last_message = outputs[0]['generated_text'][-1] | |
| if isinstance(last_message, dict) and last_message.get('role') == 'assistant': | |
| assistant_response = last_message.get('content', "").strip() | |
| else: | |
| # Fallback if format is unexpected - try getting last element's text if it's a string? | |
| assistant_response = str(outputs[0]['generated_text'][-1]).strip() | |
| if not assistant_response: | |
| assistant_response = "I generated an empty response." | |
| else: | |
| print("Unexpected output format:", outputs) # Log for debugging | |
| assistant_response = "Sorry, I couldn't parse the response format." | |
| message_placeholder.markdown(assistant_response) | |
| st.session_state.messages.append({"role": "assistant", "content": assistant_response}) | |
| except Exception as e: | |
| error_message = f"Error during text generation: {e}" | |
| st.error(error_message, icon="π₯") | |
| message_placeholder.markdown("Sorry, an error occurred generating the response.") | |
| st.session_state.messages.append({"role": "assistant", "content": f"[Error: {e}]"}) |