Spaces:
Sleeping
Sleeping
| # app.py | |
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer | |
| import torch | |
| # --- 1. Load the fine-tuned model and tokenizer --- | |
| # This will download the model from your Hugging Face Hub repository. | |
| # Using bfloat16 for better performance on modern GPUs/CPUs. | |
| try: | |
| model_id = "devanshdhir/qwen3-0.6B-flask-expert" # Your model ID | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model_id, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto" | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model_loaded = True | |
| except Exception as e: | |
| model_loaded = False | |
| model_error = str(e) | |
| # --- 2. Define the prediction function --- | |
| # This function will take a user's question, format it, and return the model's answer. | |
| def get_flask_answer(question): | |
| if not model_loaded: | |
| return f"Error loading the model: {model_error}" | |
| # Use the same Alpaca prompt format you used for training | |
| prompt = f"""### Instruction: | |
| {question} | |
| ### Response: | |
| """ | |
| # Generate the response | |
| # We use a low temperature for more factual, less "creative" answers. | |
| # Adding EOS token as a stopping criterion helps prevent over-generation and fixes protocol errors. | |
| outputs = pipe( | |
| prompt, | |
| max_new_tokens=256, | |
| do_sample=True, | |
| temperature=0.1, | |
| top_p=0.95, | |
| eos_token_id=tokenizer.eos_token_id, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| # Extract only the answer part from the generated text | |
| full_response = outputs[0]["generated_text"] | |
| try: | |
| answer = full_response.split("### Response:")[1].strip() | |
| return answer | |
| except IndexError: | |
| return "The model did not generate a response in the expected format. Please try again." | |
| # --- 3. Create the Gradio Interface --- | |
| # This creates the web UI with input/output components using a theme. | |
| with gr.Blocks(theme=gr.themes.Soft()) as iface: | |
| gr.Markdown( | |
| """ | |
| # Flask Expert Q&A Bot 🤖 | |
| This is a demo of the Qwen3-0.6B model, fine-tuned on the Flask web framework's documentation and source code. | |
| Ask it a technical question about Flask! | |
| """ | |
| ) | |
| with gr.Row(): | |
| inp = gr.Textbox( | |
| lines=5, | |
| label="Your Question about Flask", | |
| placeholder="e.g., How does Flask's `g` object facilitate the sharing of request-specific data?" | |
| ) | |
| out = gr.Markdown(label="Answer from Flask-Expert") | |
| btn = gr.Button("Submit") | |
| btn.click(fn=get_flask_answer, inputs=inp, outputs=out) | |
| gr.Examples( | |
| examples=[ | |
| ["What happens if I accidentally define a route with the exact same URL in two different blueprints?"], | |
| ["How do you handle file uploads securely in Flask?"], | |
| ["Can you tell me the recipe for butter chicken?"] | |
| ], | |
| inputs=inp | |
| ) | |
| # --- 4. Launch the Interface --- | |
| if __name__ == "__main__": | |
| iface.launch() |