Spaces:
Sleeping
Sleeping
File size: 3,709 Bytes
da321c2 59847fe da321c2 59847fe da321c2 59847fe ea6cef5 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe efb888a 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe da321c2 59847fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# app.py (LoRA-only loading)
import gradio as gr
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
import torch
import os
import re
import json
import time
from datetime import datetime
from huggingface_hub import model_info
# ===== Settings =====
device = 0 if torch.cuda.is_available() else -1
lora_repo = "rahul7star/GPT-Diffuser-v1" # ONLY LoRA fine-tuned repo
log_lines = []
def log(msg):
line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
print(line)
log_lines.append(line)
log(f"π Loading LoRA-only model from {lora_repo}")
log(f"Device: {'GPU' if device==0 else 'CPU'}")
# ====== Tokenizer ======
try:
tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
log(f"β
Tokenizer loaded: vocab size {tokenizer.vocab_size}")
except Exception as e:
log(f"β Tokenizer load failed: {e}")
tokenizer = None
# ====== LoRA-only model ======
model = None
pipe = None
try:
model = AutoModelForCausalLM.from_pretrained(
lora_repo,
trust_remote_code=True,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
)
model.eval()
log("β
LoRA-only model loaded successfully")
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device=device,
)
log("β
Pipeline ready for inference")
except Exception as e:
log(f"β LoRA model load failed: {e}")
# ====== Chat Function ======
def chat_with_model(message, history):
log_lines.clear()
log(f"π User message: {message}")
if pipe is None:
return "", history, "β οΈ Model pipeline not loaded."
context = "The following is a conversation between a user and an AI assistant trained on GIT souce code.\n"
for user, bot in history:
context += f"User: {user}\nAssistant: {bot}\n"
context += f"User: {message}\nAssistant:"
log("π Built conversation context")
log(context)
start_time = time.time()
try:
output = pipe(
context,
max_new_tokens=200,
do_sample=True,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
)[0]["generated_text"]
log(f"β±οΈ Inference took {time.time() - start_time:.2f}s")
except Exception as e:
log(f"β Generation failed: {e}")
return "", history, "\n".join(log_lines)
# Clean reply
reply = output[len(context):].strip()
reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
reply = re.sub(r"\s{2,}", " ", reply).strip()
reply = reply.split("User:")[0].split("Assistant:")[0].strip()
log(f"πͺ Model reply: {reply}")
history.append((message, reply))
return "", history, "\n".join(log_lines)
# ===== Gradio =====
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
gr.Markdown("## π¬ Qwen LoRA-only β Bhagavad Gita Assistant")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Ask about the Gita...", label="Your Message")
clear = gr.Button("Clear")
with gr.Column(scale=1):
log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|