Spaces:

lingadevaruhp
/

thoshan_Flash_mini

Sleeping

App Files Files Community

lingadevaruhp commited on Sep 15, 2025

Commit

7d955d5

verified ·

1 Parent(s): 8625cb1

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -39

app.py CHANGED Viewed

@@ -1,42 +1,37 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import gradio as gr
 import json
 import os
-# Load tokenizer and model - using thoshan_Flash model
-model_name = "microsoft/Phi-3-mini-4k-instruct"  # Will be replaced with actual thoshan_Flash model when available
 try:
-    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-    # Load base model directly (no LoRA adapters)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
         torch_dtype=torch.bfloat16,
         device_map="auto",
         low_cpu_mem_usage=True,
         trust_remote_code=True,
-        attn_implementation="eager",  # Fix for compatibility issues
-        cache_dir=None  # Disable cache to avoid compatibility issues
     )
 except Exception as e:
     print(f"Error loading model: {e}")
     tokenizer = None
     model = None
-# Load dataset for context
 def load_dataset():
-    # Try multiple possible dataset files
     dataset_files = ["2000-data-set.txt", "flirt_dataset.jsonl"]
     for dataset_file in dataset_files:
         if os.path.exists(dataset_file):
             print(f"Found dataset file: {dataset_file}")
-            # Handle different file formats
             if dataset_file.endswith('.jsonl'):
-                # Handle JSONL format
                 dataset_entries = []
                 try:
                     with open(dataset_file, 'r', encoding='utf-8') as f:
@@ -51,17 +46,12 @@ def load_dataset():
                     print(f"Error reading JSONL file {dataset_file}: {e}")
                     continue
             else:
-                # Handle plain text format - create sample entries
                 try:
                     with open(dataset_file, 'r', encoding='utf-8') as f:
                         content = f.read().strip()
-                    # Skip if content looks like HTML (as in the current file)
                     if content.startswith('<!DOCTYPE html>') or '<html>' in content:
                         print(f"Skipping HTML file: {dataset_file}")
                         continue
-                    # Create sample conversation entries from text
                     sample_entries = [
                         {"input": "Hello", "output": "Hi there! How are you doing today?"},
                         {"input": "How are you?", "output": "I'm doing great! Thanks for asking. What can I help you with?"},
@@ -71,30 +61,23 @@ def load_dataset():
                 except Exception as e:
                     print(f"Error reading text file {dataset_file}: {e}")
                     continue
     print("No valid dataset file found, using default responses")
-    # Return default entries if no file found
     return [
         {"input": "Hello", "output": "Hi there! How are you doing today?"},
         {"input": "How are you?", "output": "I'm doing great! Thanks for asking. What can I help you with?"},
         {"input": "Tell me about yourself", "output": "I'm thoshan_Flash, an AI assistant created to help and chat with you. I'm friendly and always happy to help!"}
     ]
-# Load the dataset content
 dataset_content = load_dataset()
 print(f"Loaded {len(dataset_content)} dataset entries")
 def generate_response(prompt, max_new_tokens=100):
-    # Check if model is available
     if model is None or tokenizer is None:
         return "Error: Model failed to load. Please check the logs and try restarting the space."
     try:
-        # Add dataset context to the prompt for better responses
         context = ""
         if dataset_content:
-            # Use first few entries as context
-            context_entries = dataset_content[:3]  # Use first 3 entries
             context_text = ""
             for entry in context_entries:
                 if 'input' in entry and 'output' in entry:
@@ -102,12 +85,8 @@ def generate_response(prompt, max_new_tokens=100):
                 elif 'text' in entry:
                     context_text += f"{entry['text']}\n\n"
             context = f"Dataset context:\n{context_text}\n" if context_text else ""
-        # Format the prompt for thoshan_Flash
         formatted_prompt = f"<|user|>\n{context}{prompt}<|end|>\n<|assistant|>\n"
-        inputs = tokenizer(formatted_prompt, return_tensors="pt")
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
@@ -116,17 +95,13 @@ def generate_response(prompt, max_new_tokens=100):
                 temperature=0.7,
                 top_p=0.9,
                 pad_token_id=tokenizer.eos_token_id,
-                use_cache=False  # Disable caching to avoid compatibility issues
             )
-        # Decode only the generated part (excluding the input)
         generated_text = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
         return generated_text.strip()
     except Exception as e:
         return f"Error generating response: {str(e)}"
-# Gradio interface
 iface = gr.Interface(
     fn=generate_response,
     inputs=[
@@ -139,4 +114,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
 import torch
 import gradio as gr
 import json
 import os
+# --- Change only these two lines if you update your base or adapter! ---
+base_model_name = "unsloth/gemma-2-9b-it-bnb-4bit"
+lora_adapter_path = "lingadevaruhp/thoshan_Flash"
+# ----------------------------------------------------------------------
 try:
+    tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
+    base_model = AutoModelForCausalLM.from_pretrained(
+        base_model_name,
         torch_dtype=torch.bfloat16,
         device_map="auto",
         low_cpu_mem_usage=True,
         trust_remote_code=True,
+        attn_implementation="eager"
     )
+    model = PeftModel.from_pretrained(base_model, lora_adapter_path)
 except Exception as e:
     print(f"Error loading model: {e}")
     tokenizer = None
     model = None
 def load_dataset():
     dataset_files = ["2000-data-set.txt", "flirt_dataset.jsonl"]
     for dataset_file in dataset_files:
         if os.path.exists(dataset_file):
             print(f"Found dataset file: {dataset_file}")
             if dataset_file.endswith('.jsonl'):
                 dataset_entries = []
                 try:
                     with open(dataset_file, 'r', encoding='utf-8') as f:
                     print(f"Error reading JSONL file {dataset_file}: {e}")
                     continue
             else:
                 try:
                     with open(dataset_file, 'r', encoding='utf-8') as f:
                         content = f.read().strip()
                     if content.startswith('<!DOCTYPE html>') or '<html>' in content:
                         print(f"Skipping HTML file: {dataset_file}")
                         continue
                     sample_entries = [
                         {"input": "Hello", "output": "Hi there! How are you doing today?"},
                         {"input": "How are you?", "output": "I'm doing great! Thanks for asking. What can I help you with?"},
                 except Exception as e:
                     print(f"Error reading text file {dataset_file}: {e}")
                     continue
     print("No valid dataset file found, using default responses")
     return [
         {"input": "Hello", "output": "Hi there! How are you doing today?"},
         {"input": "How are you?", "output": "I'm doing great! Thanks for asking. What can I help you with?"},
         {"input": "Tell me about yourself", "output": "I'm thoshan_Flash, an AI assistant created to help and chat with you. I'm friendly and always happy to help!"}
     ]
 dataset_content = load_dataset()
 print(f"Loaded {len(dataset_content)} dataset entries")
 def generate_response(prompt, max_new_tokens=100):
     if model is None or tokenizer is None:
         return "Error: Model failed to load. Please check the logs and try restarting the space."
     try:
         context = ""
         if dataset_content:
+            context_entries = dataset_content[:3]
             context_text = ""
             for entry in context_entries:
                 if 'input' in entry and 'output' in entry:
                 elif 'text' in entry:
                     context_text += f"{entry['text']}\n\n"
             context = f"Dataset context:\n{context_text}\n" if context_text else ""
         formatted_prompt = f"<|user|>\n{context}{prompt}<|end|>\n<|assistant|>\n"
+        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 temperature=0.7,
                 top_p=0.9,
                 pad_token_id=tokenizer.eos_token_id,
+                use_cache=False
             )
         generated_text = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
         return generated_text.strip()
     except Exception as e:
         return f"Error generating response: {str(e)}"
 iface = gr.Interface(
     fn=generate_response,
     inputs=[
 )
 if __name__ == "__main__":
+    iface.launch()