Abdalkaderdev commited on
Commit
4aaef3d
·
1 Parent(s): e6f387f

Add Gradio interface for ZeroGPU testing

Browse files
Files changed (2) hide show
  1. app/gradio_interface.py +121 -0
  2. requirements.txt +8 -5
app/gradio_interface.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio interface for ORA - Fast GPU testing
3
+ This runs alongside the FastAPI server for quick testing with ZeroGPU
4
+ """
5
+
6
+ import gradio as gr
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer
9
+ from peft import PeftModel
10
+ import os
11
+
12
+ # Use ZeroGPU decorator for free GPU access
13
+ try:
14
+ import spaces
15
+ USE_GPU = True
16
+ except ImportError:
17
+ USE_GPU = False
18
+ print("ZeroGPU not available, running on CPU")
19
+
20
+ # Settings
21
+ BASE_MODEL = "unsloth/Llama-3.2-1B-Instruct"
22
+ ADAPTER_PATH = "important/finetuning/models/ora_adapter"
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
+
25
+ # Load model once
26
+ print(f"Loading ORA Model on {device}...")
27
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
28
+ base_model = AutoModelForCausalLM.from_pretrained(
29
+ BASE_MODEL,
30
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
31
+ device_map=device,
32
+ low_cpu_mem_usage=True
33
+ )
34
+
35
+ if os.path.exists(ADAPTER_PATH):
36
+ print(f"Loading adapter from {ADAPTER_PATH}...")
37
+ model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
38
+ else:
39
+ print("Adapter not found, using base model.")
40
+ model = base_model
41
+
42
+ print("ORA Model Ready!")
43
+
44
+ # Apply ZeroGPU decorator if available
45
+ if USE_GPU:
46
+ @spaces.GPU
47
+ def generate_response(message, history):
48
+ return _generate_response(message, history)
49
+ else:
50
+ def generate_response(message, history):
51
+ return _generate_response(message, history)
52
+
53
+ def _generate_response(message, history):
54
+ """Generate ORA response"""
55
+
56
+ # Build conversation history
57
+ messages = [{
58
+ "role": "system",
59
+ "content": """You are ORA, a wise and compassionate spiritual guide.
60
+
61
+ Your role:
62
+ - Provide biblically-grounded wisdom
63
+ - Speak with warmth, empathy, and pastoral care
64
+ - Keep responses concise but meaningful (2-3 sentences)
65
+ - Always cite scripture when relevant
66
+
67
+ Respond with compassion and wisdom."""
68
+ }]
69
+
70
+ # Add conversation history
71
+ for user_msg, assistant_msg in history:
72
+ messages.append({"role": "user", "content": user_msg})
73
+ messages.append({"role": "assistant", "content": assistant_msg})
74
+
75
+ messages.append({"role": "user", "content": message})
76
+
77
+ # Generate response
78
+ input_ids = tokenizer.apply_chat_template(
79
+ messages,
80
+ add_generation_prompt=True,
81
+ return_tensors="pt"
82
+ ).to(device)
83
+
84
+ terminators = [
85
+ tokenizer.eos_token_id,
86
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
87
+ ]
88
+
89
+ outputs = model.generate(
90
+ input_ids,
91
+ max_new_tokens=128,
92
+ eos_token_id=terminators,
93
+ do_sample=True,
94
+ temperature=0.7,
95
+ top_p=0.9,
96
+ )
97
+
98
+ response_tokens = outputs[0][input_ids.shape[-1]:]
99
+ response = tokenizer.decode(response_tokens, skip_special_tokens=True)
100
+
101
+ return response
102
+
103
+ # Create Gradio interface
104
+ demo = gr.ChatInterface(
105
+ fn=generate_response,
106
+ title="🙏 ORA - Spiritual AI Assistant (GPU Testing)",
107
+ description="Fast GPU-powered testing interface for ORA. Ask theological questions, seek spiritual guidance, or discuss scripture.",
108
+ examples=[
109
+ "What does the Bible say about finding peace in difficult times?",
110
+ "How can I grow closer to God?",
111
+ "Explain the parable of the prodigal son",
112
+ "What is the meaning of faith?",
113
+ ],
114
+ theme=gr.themes.Soft(),
115
+ retry_btn=None,
116
+ undo_btn=None,
117
+ clear_btn="Clear",
118
+ )
119
+
120
+ if __name__ == "__main__":
121
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
requirements.txt CHANGED
@@ -1,8 +1,11 @@
 
 
 
 
 
 
 
 
1
  fastapi
2
  uvicorn
3
  pydantic
4
- pydantic-settings
5
- openai
6
- python-dotenv
7
- pytest
8
- pytest-asyncio
 
1
+ gradio>=5.9.1
2
+ torch>=2.0.0
3
+ transformers>=4.40.0
4
+ peft>=0.10.0
5
+ accelerate>=0.27.0
6
+ huggingface-hub>=0.20.0
7
+ spaces
8
+ lancedb
9
  fastapi
10
  uvicorn
11
  pydantic