Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,66 +1,82 @@
|
|
| 1 |
|
| 2 |
import gradio as gr
|
| 3 |
import time
|
| 4 |
-
import
|
| 5 |
-
import
|
| 6 |
|
| 7 |
-
|
| 8 |
-
DEMO_RESPONSES = {
|
| 9 |
-
"Hi there!": "Hi there! 👋 I'm the RML-AI demo, and I'd love to answer any questions you have about AI, technology, science, or anything else!",
|
| 10 |
-
"What can you do?": "Great question! I'm a demo of RML-AI, which is pretty amazing. I can chat about AI, technology, science, business, health - you name it! The full system has access to 100GB of knowledge and can answer complex questions with sources. What interests you?",
|
| 11 |
-
"Tell me about AI": "Oh, AI! That's a fascinating topic. AI and machine learning are changing the world, and RML-AI is actually a revolutionary new approach to AI. The full system would give you detailed, accurate answers with sources from its massive knowledge base. What specifically about AI interests you?",
|
| 12 |
-
"How are you?": "I'm doing great, thanks for asking! 😊 I'm excited to be chatting with you and showing off what RML-AI can do. What's on your mind today?",
|
| 13 |
-
"What is RML?": "RML is what makes this system special! It's Resonant Memory Learning - a completely new way of doing AI that's 100x more efficient and 10x faster than traditional systems. It's exactly the kind of technology that's changing the future of AI!"
|
| 14 |
-
}
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def generate_response(prompt, max_new_tokens=128, temperature=0.2):
|
| 19 |
start = time.time()
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
# Greetings and casual conversation
|
| 29 |
-
if any(word in prompt_lower for word in ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']):
|
| 30 |
-
greetings = ["Hi there! 👋", "Hello! Nice to meet you!", "Hey! How can I help you today?", "Hi! I'm excited to chat with you!", "Hello! What would you like to know?"]
|
| 31 |
-
import random
|
| 32 |
-
reply = random.choice(greetings) + " I'm the RML-AI demo, and I'd love to answer any questions you have about AI, technology, science, or anything else!"
|
| 33 |
-
|
| 34 |
-
# How are you / personal questions
|
| 35 |
-
elif any(word in prompt_lower for word in ['how are you', 'how do you do', 'are you ok', 'feeling']):
|
| 36 |
-
reply = "I'm doing great, thanks for asking! 😊 I'm excited to be chatting with you and showing off what RML-AI can do. What's on your mind today?"
|
| 37 |
-
|
| 38 |
-
# What can you do / capabilities
|
| 39 |
-
elif any(word in prompt_lower for word in ['what can you do', 'help', 'capabilities', 'features']):
|
| 40 |
-
reply = "Great question! I'm a demo of RML-AI, which is pretty amazing. I can chat about AI, technology, science, business, health - you name it! The full system has access to 100GB of knowledge and can answer complex questions with sources. What interests you?"
|
| 41 |
|
| 42 |
-
#
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
|
| 48 |
-
reply = "Technology is incredible, isn't it? " + prompt + " is such an interesting area. The RML-AI system has tons of knowledge about the latest tech developments and can explain complex concepts in simple terms. What aspect of technology would you like to explore?"
|
| 49 |
|
| 50 |
-
#
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
reply = "RML is what makes this system special! It's Resonant Memory Learning - a completely new way of doing AI that's 100x more efficient and 10x faster than traditional systems. " + prompt + " is exactly the kind of question RML-AI excels at answering with its revolutionary frequency-based approach."
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
with gr.Blocks(title="RML-AI Demo") as demo:
|
| 66 |
gr.Markdown('''
|
|
|
|
| 1 |
|
| 2 |
import gradio as gr
|
| 3 |
import time
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 5 |
+
import torch
|
| 6 |
|
| 7 |
+
MODEL_ID = "akshaynayaks9845/rml-ai-phi1_5-rml-100k"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
# Global model and tokenizer
|
| 10 |
+
_model = None
|
| 11 |
+
_tokenizer = None
|
| 12 |
+
|
| 13 |
+
def load_model():
|
| 14 |
+
global _model, _tokenizer
|
| 15 |
+
if _model is None:
|
| 16 |
+
try:
|
| 17 |
+
print("Loading RML model...")
|
| 18 |
+
_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
| 19 |
+
if _tokenizer.pad_token is None:
|
| 20 |
+
_tokenizer.pad_token = _tokenizer.eos_token
|
| 21 |
+
|
| 22 |
+
_model = AutoModelForCausalLM.from_pretrained(
|
| 23 |
+
MODEL_ID,
|
| 24 |
+
trust_remote_code=True,
|
| 25 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 26 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
| 27 |
+
low_cpu_mem_usage=True
|
| 28 |
+
)
|
| 29 |
+
print("Model loaded successfully!")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"Error loading model: {e}")
|
| 32 |
+
return False
|
| 33 |
+
return True
|
| 34 |
|
| 35 |
def generate_response(prompt, max_new_tokens=128, temperature=0.2):
|
| 36 |
start = time.time()
|
| 37 |
|
| 38 |
+
if not load_model():
|
| 39 |
+
return "Error: Could not load the RML model. Please try again."
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
# Prepare input
|
| 43 |
+
inputs = _tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
+
# Generate response
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
outputs = _model.generate(
|
| 48 |
+
**inputs,
|
| 49 |
+
max_new_tokens=int(max_new_tokens),
|
| 50 |
+
do_sample=bool(temperature > 0),
|
| 51 |
+
temperature=float(temperature),
|
| 52 |
+
top_p=0.9,
|
| 53 |
+
repetition_penalty=1.1,
|
| 54 |
+
pad_token_id=_tokenizer.eos_token_id
|
| 55 |
+
)
|
| 56 |
|
| 57 |
+
# Decode response
|
| 58 |
+
generated_text = _tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
| 59 |
|
| 60 |
+
# Extract only the new part (after the input prompt)
|
| 61 |
+
if generated_text.startswith(prompt):
|
| 62 |
+
response = generated_text[len(prompt):].strip()
|
| 63 |
+
else:
|
| 64 |
+
response = generated_text.strip()
|
| 65 |
|
| 66 |
+
elapsed = int((time.time() - start) * 1000)
|
| 67 |
+
return response + f"\n\n(⏱️ {elapsed} ms)"
|
|
|
|
| 68 |
|
| 69 |
+
except Exception as e:
|
| 70 |
+
return f"Error generating response: {str(e)}"
|
| 71 |
+
|
| 72 |
+
# Sample questions for the demo
|
| 73 |
+
SAMPLES = [
|
| 74 |
+
"What is artificial intelligence?",
|
| 75 |
+
"Explain machine learning in simple terms",
|
| 76 |
+
"What is quantum computing?",
|
| 77 |
+
"How does RML work?",
|
| 78 |
+
"Tell me about neural networks"
|
| 79 |
+
]
|
| 80 |
|
| 81 |
with gr.Blocks(title="RML-AI Demo") as demo:
|
| 82 |
gr.Markdown('''
|