Spaces:
Sleeping
Sleeping
File size: 3,318 Bytes
6f3fedf 8ba03d0 88644cf 6f3fedf 88644cf f7f5fd7 8ba03d0 c4d5905 a81100f 6f3fedf c4d5905 88644cf 8ba03d0 88644cf 8ba03d0 6f3fedf 8ba03d0 c4d5905 88644cf a81100f 6f3fedf c4d5905 6f3fedf c4d5905 8ba03d0 6f3fedf 88644cf 6f3fedf 88644cf 8ba03d0 88644cf 6f3fedf 8ba03d0 10bc714 6f3fedf 10bc714 8ba03d0 6f3fedf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
from huggingface_hub import InferenceClient
# --- Configuration: Model List ---
MODELS = {
"Qwen 2.5 Coder 32B (Recommended)": "Qwen/Qwen2.5-Coder-32B-Instruct",
"Llama 3.1 8B (Best Logic)": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"DeepSeek Coder V2 Lite (Expert)": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
"Mistral Nemo 12B (Strong)": "mistralai/Mistral-Nemo-Instruct-2407",
"GLM-4 / CodeGeeX4 9B": "THUDM/codegeex4-all-9b"
}
# Configuration for Memory
MAX_HISTORY = 10
def generate_abap(message, history, model_choice):
# 1. Get the Hugging Face Model ID
model_id = MODELS.get(model_choice, "Qwen/Qwen2.5-Coder-32B-Instruct")
client = InferenceClient()
system_prompt = "You are an expert SAP ABAP Developer. Write modern, efficient ABAP 7.4+ code. Always use inline declarations."
messages = [{"role": "system", "content": system_prompt}]
# 2. Add History (Robust Fix)
# We slice the history to keep memory usage low
recent_history = history[-MAX_HISTORY:]
for turn in recent_history:
# CASE 1: History is a List of Lists (Standard Gradio format: [[user, bot], ...])
if isinstance(turn, (list, tuple)):
messages.append({"role": "user", "content": str(turn[0])})
if len(turn) > 1 and turn[1] is not None:
messages.append({"role": "assistant", "content": str(turn[1])})
# CASE 2: History is a List of Dictionaries (Newer format: [{'role': 'user', ...}])
elif isinstance(turn, dict):
# We can simply append the dictionary directly if it has 'role' and 'content'
messages.append(turn)
# 3. Add Current Message
messages.append({"role": "user", "content": str(message)})
try:
# 4. Stream Response
stream = client.chat_completion(
model=model_id,
messages=messages,
max_tokens=2048,
temperature=0.1,
top_p=0.9,
stream=True
)
partial_message = ""
for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
partial_message += chunk.choices[0].delta.content
yield partial_message
except Exception as e:
yield f"Error: The Free API is overloaded or model is too large. \nDetails: {str(e)}"
# --- The UI ---
with gr.Blocks(theme="soft") as demo:
gr.Markdown("# 🚀 ABAP Coder Multi-Model")
gr.Markdown("Select a model below. **Note:** Qwen 32B is large and may timeout on the free tier. If it fails, try Llama 3.1 8B.")
model_selector = gr.Dropdown(
choices=list(MODELS.keys()),
value="Qwen 2.5 Coder 32B (Recommended)",
label="Select AI Model"
)
chat = gr.ChatInterface(
fn=generate_abap,
additional_inputs=[model_selector],
examples=[
["Write a report to select data from MARA using inline declarations.", "Qwen 2.5 Coder 32B (Recommended)"],
["Create a CDS View for Sales Orders (VBAK/VBAP).", "Llama 3.1 8B (Best Logic)"],
["Explain how to use READ TABLE with ASSIGNING FIELD-SYMBOL.", "DeepSeek Coder V2 Lite (Expert)"]
]
)
if __name__ == "__main__":
demo.launch() |