Spaces:

GilbertAkham
/

GJAI

Sleeping

App Files Files Community

GilbertAkham commited on Oct 30, 2025

Commit

f2e1d5d

verified ·

1 Parent(s): f6444f5

Upload app.py

Browse files

Files changed (1) hide show

app.py +120 -61

app.py CHANGED Viewed

@@ -1,70 +1,129 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 chatbot = gr.ChatInterface(
-    respond,
     type="messages",
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
         ),
     ],
 )
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+# app.py — Gilbert Multitask AI (LoRA)
 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+# Model config
+MODEL_NAME = "GilbertAkham/gilbert-qwen-multitask-lora"
+BASE_MODEL = "Qwen/Qwen1.5-1.8B-Chat"
+# ------------------------
+# MODEL LOADING
+# ------------------------
+class MultitaskInference:
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.load_model()
+    def load_model(self):
+        """Load base + LoRA model"""
+        print("🔄 Loading tokenizer...")
+        self.tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
+        print("🔄 Loading base model...")
+        base_model = AutoModelForCausalLM.from_pretrained(
+            BASE_MODEL,
+            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+            device_map="auto" if self.device == "cuda" else None,
+            trust_remote_code=True,
+        )
+        print("🔄 Loading LoRA adapter...")
+        self.model = PeftModel.from_pretrained(base_model, MODEL_NAME)
+        self.model.to(self.device)
+        self.model.eval()
+        print("✅ Model loaded successfully!")
+    def generate(self, task_type, text, max_tokens=512, temperature=0.7, top_p=0.9):
+        """Generate multitask response"""
+        task_prompts = {
+            "email": "Draft an email reply",
+            "story": "Continue the story",
+            "tech": "Answer the technical question",
+            "summary": "Summarize the content",
+            "chat": "Provide a helpful chat response"
+        }
+        prompt = f"### Task: {task_prompts[task_type]}\n\n### Input:\n{text}\n\n### Output:\n"
+        try:
+            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(self.device)
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_new_tokens=max_tokens,
+                    temperature=temperature,
+                    do_sample=True,
+                    top_p=top_p,
+                    repetition_penalty=1.1,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            if "### Output:" in response:
+                response = response.split("### Output:")[-1].strip()
+            return response
+        except Exception as e:
+            return f"❌ Error generating response: {e}"
+# ------------------------
+# GRADIO INTERFACE
+# ------------------------
+engine = MultitaskInference()
+def chat_response(message, history, task_type, max_tokens, temperature, top_p):
+    """Chat handler for Gradio ChatInterface"""
+    try:
+        reply = engine.generate(
+            task_type=task_type,
+            text=message,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p
+        )
+        yield reply
+    except Exception as e:
+        yield f"❌ Error: {e}"
+# ------------------------
+# BUILD CHAT APP
+# ------------------------
 chatbot = gr.ChatInterface(
+    fn=chat_response,
     type="messages",
     additional_inputs=[
+        gr.Dropdown(
+            choices=["chat", "email", "story", "tech", "summary"],
+            value="chat",
+            label="🎯 Task Type",
+            info="Select the text generation mode",
         ),
+        gr.Slider(minimum=64, maximum=1024, value=512, step=32, label="📏 Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="🌡️ Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="🎲 Top-p"),
+    ],
+    title="🚀 Gilbert Multitask AI",
+    description=(
+        "**Base Model:** Qwen1.5-1.8B-Chat\n\n"
+        "LoRA fine-tuned for: Email drafting, story continuation, tech Q&A, summarization, and chat responses."
+    ),
+    theme=gr.themes.Soft(),
+    examples=[
+        ["Write a professional email update for a client about completing a milestone."],
+        ["Continue the story: The spaceship hummed as Captain Lira adjusted the controls..."],
+        ["How can I fix a 'ModuleNotFoundError' in Python?"],
+        ["Summarize: Artificial intelligence is transforming industries through automation and insights."],
+        ["Hey, I can’t access the company VPN. What should I do?"],
     ],
 )
 if __name__ == "__main__":
+    chatbot.launch(server_name="0.0.0.0", server_port=7860, share=True)