Ynugget commited on
Commit
d6f4416
·
verified ·
1 Parent(s): 98e409e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -31
app.py CHANGED
@@ -1,42 +1,46 @@
1
  import gradio as gr
2
- from llama_cpp import Llama
 
3
 
4
- # Load GGUF model (adjust path if needed)
5
- model_path = "Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1-Q6_K_L.gguf"
6
- llm = Llama(
7
- model_path=model_path,
8
- n_ctx=2048, # Context window
9
- n_threads=4, # CPU threads
10
- n_gpu_layers=35 # Offload layers to GPU (set to 0 for CPU-only)
 
 
 
 
11
  )
12
 
13
- # Custom system prompt (enforced for all users)
14
- SYSTEM_PROMPT = """You are a specialized AI assistant. Follow these rules:
15
- - Only use data from 'https://example.com/official_data.json' to answer.
16
- - Respond in under 3 sentences.
17
- - If unsure, say "I can only answer from the provided source"."""
18
 
19
  def respond(message, history):
20
- # Format prompt with system instructions
21
- full_prompt = f"""<|system|>{SYSTEM_PROMPT}</s>
22
- <|user|>{message}</s>
23
- <|assistant|>"""
24
 
25
  # Generate response
26
- output = llm(
27
- full_prompt,
28
- max_tokens=150,
29
- temperature=0.7,
30
- stop=["</s>", "<|user|>"]
31
  )
32
 
33
- return output["choices"][0]["text"]
34
 
35
- # Gradio interface
36
- gr.ChatInterface(
37
- respond,
38
- title="Phi-3.5 Mini (GGUF)",
39
- description="Restricted Knowledge Assistant",
40
- examples=["What's our refund policy?", "List approved vendors"],
41
- theme="soft"
42
- ).launch()
 
 
1
  import gradio as gr
2
+ from ctransformers import AutoModelForCausalLM
3
+ import os
4
 
5
+ # Download model (runs on first launch)
6
+ MODEL_PATH = "Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1-Q6_K_L.gguf"
7
+ if not os.path.exists(MODEL_PATH):
8
+ os.system(f"wget https://huggingface.co/ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1-GGUF/resolve/main/{MODEL_PATH}")
9
+
10
+ # Load GGUF model with ctransformers
11
+ llm = AutoModelForCausalLM.from_pretrained(
12
+ MODEL_PATH,
13
+ model_type="phi3",
14
+ gpu_layers=50, # Offload to GPU (set to 0 for CPU)
15
+ context_length=2048
16
  )
17
 
18
+ # System prompt to restrict knowledge
19
+ SYSTEM_PROMPT = """[SYSTEM] You are a compliance assistant. Follow these rules:
20
+ 1. ONLY use data from '/data/company_policies.pdf' (provided in this Space's files)
21
+ 2. If asked about unverified information, respond: "I can only reference approved documents"
22
+ 3. Keep answers under 2 sentences."""
23
 
24
  def respond(message, history):
25
+ # Format Phi-3 prompt template
26
+ prompt = f"{SYSTEM_PROMPT}\n[USER]{message}\n[ASSISTANT]"
 
 
27
 
28
  # Generate response
29
+ response = llm(
30
+ prompt,
31
+ max_new_tokens=100,
32
+ temperature=0.3, # Low for deterministic answers
33
+ stop=["[USER]", "\n\n"]
34
  )
35
 
36
+ return response
37
 
38
+ # Gradio interface with file upload for knowledge base
39
+ with gr.Blocks() as demo:
40
+ gr.Markdown("## Phi-3.5 Mini - Restricted Knowledge Assistant")
41
+ with gr.Tab("Chat"):
42
+ chat_interface = gr.ChatInterface(respond)
43
+ with gr.Tab("Upload Source"):
44
+ gr.File(label="Upload PDF/JSON for reference", file_count="single")
45
+
46
+ demo.launch()