Spaces:

Bhaskar2611
/

skin_Bot

Sleeping

App Files Files Community

Bhaskar2611 commited on May 20, 2025

Commit

2f8b5bf

verified ·

1 Parent(s): 464e4d7

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -15

app.py CHANGED Viewed

@@ -161,19 +161,30 @@
 # # gr.load("models/Bhaskar2611/Capstone").launch()
 import os
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
-# Load your Hugging Face token from environment variables
 hf_token = os.environ.get("HF_TOKEN")
-model_id = "HuggingFaceH4/zephyr-7b-beta"
-# Load tokenizer and model with token parameter (no deprecated args)
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
-model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
-# Define a skin assistant prompt to set the context for the model
 SKIN_ASSISTANT_PROMPT = (
     "You are a helpful assistant specialized in skin diseases and dermatology. "
     "Provide accurate, concise, and helpful advice about skin conditions, symptoms, "
@@ -181,17 +192,18 @@ SKIN_ASSISTANT_PROMPT = (
 )
 def generate_response(user_input):
-    # Combine the assistant prompt + user input
     prompt = SKIN_ASSISTANT_PROMPT + user_input
-    inputs = tokenizer(prompt, return_tensors="pt")
-    outputs = model.generate(**inputs, max_length=2048, do_sample=True, temperature=0.7)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Remove the assistant prompt part from the response (if it appears)
-    if response.startswith(SKIN_ASSISTANT_PROMPT):
-        response = response[len(SKIN_ASSISTANT_PROMPT):].strip()
-    return response
 # Gradio interface
 iface = gr.Interface(
@@ -205,3 +217,4 @@ iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()

 # # gr.load("models/Bhaskar2611/Capstone").launch()
 import os
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import gradio as gr
+# Load your Hugging Face token (if needed for private models or API limit increases)
 hf_token = os.environ.get("HF_TOKEN")
+# Model ID for Mistral 7B Instruct
+model_id = "mistralai/Mistral-7B-Instruct-v0.1"
+# Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
+# BitsAndBytesConfig for 4-bit quantization to reduce memory usage
+bnb_config = BitsAndBytesConfig(load_in_4bit=True)
+# Load model with quantization and device mapping
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    quantization_config=bnb_config,
+    device_map="auto",
+    token=hf_token
+)
+# Skin assistant system prompt
 SKIN_ASSISTANT_PROMPT = (
     "You are a helpful assistant specialized in skin diseases and dermatology. "
     "Provide accurate, concise, and helpful advice about skin conditions, symptoms, "
 )
 def generate_response(user_input):
     prompt = SKIN_ASSISTANT_PROMPT + user_input
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=1024,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.95,
+        repetition_penalty=1.1
+    )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response.replace(SKIN_ASSISTANT_PROMPT, "").strip()
 # Gradio interface
 iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()