Spaces:

Gajendra5490
/

Scrached_Model_Space_Chatbot

Paused

App Files Files Community

Gajendra5490 commited on May 6, 2025

Commit

b8fe5bf

verified ·

1 Parent(s): 38304ad

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -189

app.py CHANGED Viewed

@@ -1,192 +1,50 @@
-import torch
-import json
-import logging
 import gradio as gr
-from pathlib import Path
-from huggingface_hub import hf_hub_download
-# Configuration constants
-MODEL_ID = "Gajendra5490/Scrached_Trained_Model"
-CURRENT_USER = "gajendra82"
-CURRENT_UTC = "2025-05-06 15:05:18"
-def setup_logging():
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s - %(levelname)s - %(message)s',
-        handlers=[
-            logging.FileHandler('inference.log'),
-            logging.StreamHandler()
-        ]
-    )
-    return logging.getLogger(__name__)
-class ModelInference:
-    def __init__(self, model_id):
-        self.logger = logging.getLogger(__name__)
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        self.model_id = model_id
-        self.load_model()
-    def load_model(self):
-        try:
-            # Download model and tokenizer from Hugging Face
-            self.logger.info(f"Downloading model from {self.model_id}")
-            model_path = hf_hub_download(
-                repo_id=self.model_id,
-                filename="model.pt"
-            )
-            tokenizer_path = hf_hub_download(
-                repo_id=self.model_id,
-                filename="tokenizer.json"
-            )
-            # Load model with weights_only=False
-            model_data = torch.load(
-                model_path,
-                map_location=self.device,
-                weights_only=False
-            )
-            # Load tokenizer
-            with open(tokenizer_path, 'r', encoding='utf-8') as f:
-                tokenizer_data = json.load(f)
-            # Initialize model
-            from model import ImprovedTransformer
-            model_config = model_data['model_config']
-            self.model = ImprovedTransformer(
-                vocab_size=len(tokenizer_data['vocab']),
-                d_model=model_config.get('d_model', 512),
-                nhead=model_config.get('nhead', 8),
-                num_encoder_layers=model_config.get('num_encoder_layers', 6),
-                num_decoder_layers=model_config.get('num_decoder_layers', 6),
-                dim_feedforward=model_config.get('dim_feedforward', 2048),
-                dropout=model_config.get('dropout', 0.1),
-                max_seq_length=model_config.get('max_seq_length', 128)
-            ).to(self.device)
-            # Load state dict
-            self.model.load_state_dict(model_data['model_state_dict'])
-            self.model.eval()
-            # Initialize tokenizer
-            from tokenizer import EnhancedTokenizer
-            self.tokenizer = EnhancedTokenizer(tokenizer_data['vocab'])
-            self.logger.info("Model loaded successfully")
-        except Exception as e:
-            self.logger.error(f"Error loading model: {e}")
-            raise
-    @torch.no_grad()
-    def generate_answer(self, input_text: str) -> str:
-        try:
-            # Tokenize input
-            input_ids = self.tokenizer.encode(f"<user> {input_text} <sep>")
-            input_tensor = torch.tensor([input_ids]).to(self.device)
-            # Initialize response with start token
-            response_ids = [self.tokenizer.special_tokens["<assistant>"]]
-            response_tensor = torch.tensor([response_ids]).to(self.device)
-            # Generate output
-            outputs = self.model(input_tensor, response_tensor)
-            # Get predicted tokens
-            predicted_ids = []
-            for _ in range(150):  # max length
-                curr_output = self.model(input_tensor, torch.tensor([response_ids]).to(self.device))
-                next_token = curr_output[0, -1].argmax().item()
-                if next_token == self.tokenizer.special_tokens["<eos>"]:
-                    break
-                response_ids.append(next_token)
-            # Decode output
-            answer = self.tokenizer.decode(response_ids)
-            answer = answer.replace("<assistant>", "").replace("<eos>", "").strip()
-            return answer
-        except Exception as e:
-            self.logger.error(f"Error generating answer: {e}")
-            return "Error generating answer"
-# Initialize model globally
-try:
-    print("Loading model from Hugging Face...")
-    model = ModelInference(MODEL_ID)
-    print("Model loaded successfully")
-except Exception as e:
-    print(f"Error loading model: {e}")
-    raise
-def process_input(input_text):
-    """Process input through Gradio"""
-    try:
-        # Log the input
-        logger = logging.getLogger(__name__)
-        logger.info(f"Input received: {input_text}")
-        # Generate answer
-        answer = model.generate_answer(input_text)
-        # Log the output
-        logger.info(f"Generated answer: {answer}")
-        return answer
-    except Exception as e:
-        logger.error(f"Error processing input: {e}")
-        return f"Error: {str(e)}"
-def create_gradio_interface():
-    """Create Gradio interface"""
-    iface = gr.Interface(
-        fn=process_input,
-        inputs=gr.Textbox(
-            label="Input",
-            placeholder="Enter your input here...",
-            lines=2
-        ),
-        outputs=gr.Textbox(
-            label="Answer",
-            lines=4
-        ),
-        title="Inference Interface",
-        description=f"""
-        Model: {MODEL_ID}
-        Current User: {CURRENT_USER}
-        Last Updated: {CURRENT_UTC} UTC
-        """,
-        theme=gr.themes.Soft(),
-        allow_flagging="never",
-        analytics_enabled=False
     )
-    return iface
-def main():
-    logger = setup_logging()
-    logger.info(f"Starting inference at {CURRENT_UTC}")
-    logger.info(f"User: {CURRENT_USER}")
-    try:
-        # Create and launch Gradio interface
-        iface = create_gradio_interface()
-        iface.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=False
-        )
-    except Exception as e:
-        logger.error(f"Error in main: {e}")
-        print(f"Error: {str(e)}")
 if __name__ == "__main__":
-    main()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# 1. Load your fine-tuned model
+model_path = "Gajendra5490/Scrached_Trained_Model"  # Assuming model is in the same directory as app.py
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    device_map="auto",
+    torch_dtype="auto",
+    trust_remote_code=True
+)
+# 2. Create inference function
+def chat_with_model(user_input):
+    prompt = f"### Instruction:\n{user_input}\n\n### Response:\n"
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    output = model.generate(
+        **inputs,
+        max_new_tokens=300,
+        temperature=0.7,
+        top_p=0.95,
+        do_sample=True,
+        repetition_penalty=1.1
     )
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Extract only the model's answer
+    generated_text = response.split("### Response:")[-1].strip()
+    return generated_text
+# 3. Create Gradio Interface
+interface = gr.Interface(
+    fn=chat_with_model,
+    inputs=gr.Textbox(
+        lines=2,
+        placeholder="Ask your skincare question...",
+        label="User Input"
+    ),
+    outputs=gr.Textbox(
+        label="Aesthetic AI's Reply"
+    ),
+    title="🧴 Aesthetic AI - Skincare Assistant",
+    description="Chat with Aesthetic AI for your skin concerns. Powered by fine-tuned Llama 3!",
+    theme="default"
+)
+# 4. Launch App
 if __name__ == "__main__":
+    interface.launch()