prakhardoneria commited on
Commit
fb0e8e6
·
verified ·
1 Parent(s): b0ede7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -15
app.py CHANGED
@@ -1,21 +1,36 @@
 
1
  import gradio as gr
2
- from transformers import pipeline
 
3
 
4
- # Load lightweight Flan-T5 model
5
- qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
 
6
 
7
- def answer_question(question):
8
- prompt = f"Answer the following programming question clearly:\n{question}"
9
- result = qa_pipeline(prompt, max_length=200, do_sample=True, top_p=0.9, temperature=0.7)[0]["generated_text"]
10
- return result.strip()
11
 
12
- demo = gr.Interface(
13
- fn=answer_question,
14
- inputs=gr.Textbox(lines=2, label="Ask a programming question"),
15
- outputs=gr.Textbox(label="Answer"),
16
- title="Lightweight Code Q&A (Flan-T5)",
17
- description="Ask coding questions and get short, helpful answers using the Flan-T5 model."
18
  )
19
 
20
- if __name__ == "__main__":
21
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
 
6
+ # Model info
7
+ REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
8
+ FILENAME = "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.gguf"
9
 
10
+ # Download model (if not already)
11
+ model_path = hf_hub_download(repo_id=REPO, filename=FILENAME, cache_dir="./models")
 
 
12
 
13
+ # Load model with llama-cpp
14
+ llm = Llama(
15
+ model_path=model_path,
16
+ n_ctx=2048,
17
+ n_threads=4,
18
+ use_mlock=True
19
  )
20
 
21
+ # Chat prompt wrapper
22
+ def format_prompt(message, history):
23
+ conversation = ""
24
+ for user, bot in history:
25
+ conversation += f"<|user|>\n{user.strip()}\n<|assistant|>\n{bot.strip()}\n"
26
+ conversation += f"<|user|>\n{message.strip()}\n<|assistant|>\n"
27
+ return conversation
28
+
29
+ def chat(message, history):
30
+ prompt = format_prompt(message, history)
31
+ output = llm(prompt, max_tokens=256, temperature=0.7, top_p=0.9, stop=["<|user|>", "<|assistant|>"])
32
+ reply = output["choices"][0]["text"].strip()
33
+ return reply
34
+
35
+ # Gradio chat UI
36
+ gr.ChatInterface(chat, title="TinyLlama CPU Chat", description="Lightweight local LLM (1.1B) powered by llama.cpp.").launch()