Spaces:

minte-two
/

GihonTech_Generation

Sleeping

App Files Files Community

Minte commited on Nov 12, 2025

Commit

c3e5dc7

1 Parent(s): 07e06da

lets try now

Browse files

Files changed (1) hide show

app.py +131 -64

app.py CHANGED Viewed

@@ -1,86 +1,153 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# Initialize model and tokenizer
-model = None
-tokenizer = None
-print("🚀 Initializing DialoGPT-medium model...")
-try:
-    print("📥 Loading DialoGPT-medium model...")
-    model_name = "microsoft/DialoGPT-medium"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(model_name)
-    print("✅ DialoGPT-medium model loaded successfully!")
-    # Add padding token if it doesn't exist
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-except Exception as e:
-    print(f"❌ Failed to load DialoGPT-medium model: {e}")
-    model = None
-    tokenizer = None
-def respond(message, chat_history):
-    """Respond to user message using DialoGPT"""
-    if model is None or tokenizer is None:
-        return "Model not loaded. Please try again later."
-    # Build conversation history
-    conversation = ""
-    for turn in chat_history:
-        conversation += f"User: {turn[0]}\nBot: {turn[1]}\n"
-    conversation += f"User: {message}\nBot:"
-    # Encode and generate
-    inputs = tokenizer.encode(conversation, return_tensors='pt', max_length=1024, truncation=True)
     with torch.no_grad():
         outputs = model.generate(
             inputs,
-            max_length=len(inputs[0]) + 128,
             pad_token_id=tokenizer.eos_token_id,
             do_sample=True,
             temperature=0.7,
             top_k=50,
             top_p=0.95,
-            repetition_penalty=1.2
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     response = response.split("Bot:")[-1].strip()
-    # Clean response
     if "\nUser:" in response:
         response = response.split("\nUser:")[0]
-    chat_history.append((message, response))
-    return "", chat_history
-# Create the chat interface
-demo = gr.ChatInterface(
-    fn=respond,
-    title="💬 GihonTech AI Conversation Assistant",
-    description="Chat with an AI powered by Microsoft's DialoGPT-medium model",
-    examples=[
-        "Hello! How are you today?",
-        "What can you help me with?",
-        "Tell me about artificial intelligence",
-        "What's your favorite programming language?",
-    ],
-    cache_examples=False,
-    retry_btn=None,
-    undo_btn="↩️ Undo",
-    clear_btn="🗑️ Clear"
-)
 if __name__ == "__main__":
-    demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        share=False
     )

 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from fastapi import FastAPI, Request
+# -------------------------------------------------
+# 1. Load model (same as your old code)
+# -------------------------------------------------
+print("Initializing DialoGPT-medium model...")
+model_name = "microsoft/DialoGPT-medium"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+print("DialoGPT-medium loaded!")
+# -------------------------------------------------
+# 2. Generation helper (your old logic, cleaned up)
+# -------------------------------------------------
+def generate_response(message: str, chat_history: list):
+    if not message.strip():
+        return "Please enter a message."
+    # Build conversation string
+    conv = ""
+    for user, bot in chat_history:
+        conv += f"User: {user}\nBot: {bot}\n"
+    conv += f"User: {message}\nBot:"
+    # Encode
+    inputs = tokenizer.encode(conv, return_tensors="pt", max_length=1024, truncation=True)
+    # Generate
     with torch.no_grad():
         outputs = model.generate(
             inputs,
+            max_length=inputs.shape[1] + 128,
             pad_token_id=tokenizer.eos_token_id,
             do_sample=True,
             temperature=0.7,
             top_k=50,
             top_p=0.95,
+            repetition_penalty=1.2,
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     response = response.split("Bot:")[-1].strip()
     if "\nUser:" in response:
         response = response.split("\nUser:")[0]
+    return response
+# -------------------------------------------------
+# 3. Gradio chat function (used by /run/predict)
+# -------------------------------------------------
+def chat_fn(message: str, history: list):
+    response = generate_response(message, history or [])
+    history.append((message, response))
+    return "", history   # clear textbox, update chat
+# -------------------------------------------------
+# 4. Build the UI (your Blocks layout)
+# -------------------------------------------------
+example_questions = [
+    "Hello! How are you today?",
+    "What can you help me with?",
+    "Tell me about artificial intelligence",
+    "What's your favorite programming language?",
+    "Can you explain machine learning?",
+    "How does a neural network work?"
+]
+with gr.Blocks(
+    theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green"),
+    title="GihonTech - AI Conversation Assistant"
+) as demo:
+    gr.Markdown("# GihonTech AI Conversation Assistant")
+    gr.Markdown("Chat with an AI powered by **DialoGPT-medium**")
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(label="Conversation", height=500)
+            with gr.Row():
+                msg = gr.Textbox(
+                    label="Your Message",
+                    placeholder="Type your message here...",
+                    lines=2,
+                    scale=4,
+                )
+                send = gr.Button("Send", variant="primary", scale=1)
+            clear = gr.Button("Clear Chat", variant="secondary")
+        with gr.Column(scale=1):
+            gr.Markdown("### Example Questions")
+            for q in example_questions:
+                gr.Button(q[:40] + ("..." if len(q) > 40 else ""), size="sm").click(
+                    lambda x=q: x, outputs=msg
+                )
+            gr.Markdown("---")
+            gr.Markdown("### Model Info")
+            gr.Textbox(
+                value="DialoGPT-medium: Loaded",
+                label="Model Status",
+                interactive=False,
+            )
+            gr.Markdown(
+                """
+                **Features**
+                - Context-aware replies
+                - Conversation memory
+                **Tips**
+                - Ask clear questions
+                - Use *Clear Chat* to start over
+                """
+            )
+    # Event wiring
+    send.click(chat_fn, inputs=[msg, chatbot], outputs=[msg, chatbot])
+    msg.submit(chat_fn, inputs=[msg, chatbot], outputs=[msg, chatbot])
+    clear.click(lambda: ([], ""), outputs=[chatbot, msg])
+# -------------------------------------------------
+# 5. OPTIONAL: expose /lambda (same JSON format)
+# -------------------------------------------------
+fastapi_app = FastAPI()
+@fastapi_app.post("/lambda")
+async def lambda_endpoint(req: Request):
+    payload = await req.json()
+    # Gradio sends {"data": [...]} ; we accept anything
+    user_msg = payload.get("data", [""])[0]
+    # Use the same generation logic (no history for this endpoint)
+    resp = generate_response(user_msg, [])
+    return {"data": [resp]}
+demo.mount_app(fastapi_app)   # makes /lambda reachable
+# -------------------------------------------------
+# 6. Launch with queue (critical for API!)
+# -------------------------------------------------
 if __name__ == "__main__":
+    demo.queue().launch(
         server_name="0.0.0.0",
         server_port=7860,
+        share=False,
+        show_error=True,
     )