| | import os |
| | |
| | os.environ["OMP_NUM_THREADS"] = "2" |
| | os.environ["MKL_NUM_THREADS"] = "2" |
| | os.environ["TOKENIZERS_PARALLELISM"] = "false" |
| |
|
| | from fastapi import FastAPI, HTTPException |
| | from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
| | import gradio as gr |
| |
|
| | |
| | model_id = "rasyosef/Phi-1_5-Instruct-v0.1" |
| | tokenizer = AutoTokenizer.from_pretrained(model_id) |
| | model = AutoModelForCausalLM.from_pretrained(model_id) |
| | pipe = pipeline( |
| | "text-generation", |
| | model=model, |
| | tokenizer=tokenizer |
| | ) |
| |
|
| | app = FastAPI() |
| |
|
| | @app.get("/chat") |
| | def chat(query: str): |
| | """ |
| | REST API endpoint. Use: GET /chat?query=Your question |
| | Returns a JSON {"response": "..."}. |
| | """ |
| | if not query: |
| | raise HTTPException(status_code=400, detail="Query parameter 'query' is required.") |
| | |
| | messages = [ |
| | {"role": "system", "content": "You are a helpful assistant."}, |
| | {"role": "user", "content": query} |
| | ] |
| | result = pipe( |
| | messages, |
| | max_new_tokens=100, |
| | do_sample=False, |
| | return_full_text=False |
| | ) |
| | answer = result[0]["generated_text"].strip() |
| | return {"response": answer} |
| |
|
| | |
| | def gradio_chat(input_text): |
| | if not input_text: |
| | return "" |
| | messages = [ |
| | {"role": "system", "content": "You are a helpful assistant."}, |
| | {"role": "user", "content": input_text} |
| | ] |
| | result = pipe(messages, max_new_tokens=100, do_sample=False, return_full_text=False) |
| | return result[0]["generated_text"].strip() |
| |
|
| | iface = gr.Interface( |
| | fn=gradio_chat, |
| | inputs=gr.Textbox(lines=2, placeholder="Type a message..."), |
| | outputs="text", |
| | title="Phi-1.5 Chatbot", |
| | description="Enter a message and press **Submit** to get a response." |
| | ) |
| |
|
| | |
| | app = gr.mount_gradio_app(app, iface, path="/") |