Spaces:

ammoncoder123
/

IPTchatbot

Sleeping

App Files Files Community

ammoncoder123 commited on Jan 12

Commit

c12ee4d

verified ·

1 Parent(s): fb13fa4

Update chatbot.py

Browse files

Files changed (1) hide show

chatbot.py +15 -36

chatbot.py CHANGED Viewed

@@ -5,23 +5,26 @@ import torch
 # ================= CACHE THE MODEL =================
 @st.cache_resource
 def load_model():
-    model_id = "ammoncoder123/IPTchatbotModel1-1.7B"  # ← Your correct model repo
-    # 4-bit quantization for memory efficiency (required for 1.7B on GPU)
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_compute_dtype=torch.float16
     )
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         quantization_config=quantization_config,
-        device_map="auto",          # Automatically uses GPU if available
         torch_dtype=torch.float16,
-        trust_remote_code=True      # Sometimes needed for custom models
     )
     return pipeline(
         "text-generation",
         model=model,
@@ -32,60 +35,36 @@ def load_model():
         top_p=0.9
     )
-# Load model once (this will run on first use)
 pipe = load_model()
 # ==================== CHAT INTERFACE ====================
-st.title("IPT Chatbot (1.7B Fine-Tuned Model)")
-# Show a disclaimer
-st.info("⚠️ This is a small fine-tuned model (1.7B parameters). Answers may contain inaccuracies. Always verify important information.")
-# Initialize chat history
 if "messages" not in st.session_state:
     st.session_state.messages = []
-# Display chat history
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
-# User input
-if prompt := st.chat_input("Ask me about IPT, ICT, or anything else..."):
-    # Add user message
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
-    # Generate response
     with st.chat_message("assistant"):
         with st.spinner("Thinking..."):
-            # Use proper chat format for Instruct models
-            chat_messages = [
-                {"role": "user", "content": prompt}
-            ]
-            outputs = pipe(
-                chat_messages,
-                max_new_tokens=300,
-                temperature=0.7,
-                do_sample=True,
-                top_p=0.9
-            )
-            # Extract generated text
             response = outputs[0]["generated_text"]
-            # Clean up echoed prompt
-            if isinstance(response, str) and response.startswith(prompt):
                 response = response[len(prompt):].strip()
             st.markdown(response)
-    # Save assistant response
     st.session_state.messages.append({"role": "assistant", "content": response})
-# Optional: Clear chat button
-if st.button("Clear Conversation"):
     st.session_state.messages = []
     st.rerun()

 # ================= CACHE THE MODEL =================
 @st.cache_resource
 def load_model():
+    model_id = "ammoncoder123/IPTchatbotModel1-1.7B"  # ← YOUR REAL REPO (copy-paste this exactly!)
+    st.write("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    st.write("Loading model (this may take a few minutes the first time)...")
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_compute_dtype=torch.float16
     )
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         quantization_config=quantization_config,
+        device_map="auto",  # GPU if available, else CPU
         torch_dtype=torch.float16,
+        trust_remote_code=True  # Safe for most models
     )
+    st.write("Model loaded successfully!")
     return pipeline(
         "text-generation",
         model=model,
         top_p=0.9
     )
 pipe = load_model()
 # ==================== CHAT INTERFACE ====================
+st.title("My 1.7B Fine-Tuned IPT Chatbot")
+st.info("⚠️ Small fine-tuned model (1.7B). Answers may vary — verify important info.")
 if "messages" not in st.session_state:
     st.session_state.messages = []
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
+if prompt := st.chat_input("Ask about IPT, ICT, or anything..."):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
     with st.chat_message("assistant"):
         with st.spinner("Thinking..."):
+            chat_messages = [{"role": "user", "content": prompt}]
+            outputs = pipe(chat_messages, max_new_tokens=300, temperature=0.7, do_sample=True, top_p=0.9)
             response = outputs[0]["generated_text"]
+            if response.startswith(prompt):
                 response = response[len(prompt):].strip()
             st.markdown(response)
     st.session_state.messages.append({"role": "assistant", "content": response})
+if st.button("Clear Chat"):
     st.session_state.messages = []
     st.rerun()