Nuzwa commited on
Commit
08c5887
·
verified ·
1 Parent(s): c5d5191

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -0
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ import gradio as gr
5
+ import re
6
+ from dataclasses import dataclass
7
+ from typing import List, Dict, Tuple
8
+
9
+ # ---- Language detection (very light) ----
10
+ ARABIC_RE = re.compile(r"[\u0600-\u06FF]")
11
+ def detect_lang(text: str) -> str:
12
+ if ARABIC_RE.search(text):
13
+ return "ur"
14
+ return "en"
15
+
16
+ # ---- Guardrails ----
17
+ @dataclass
18
+ class Guardrails:
19
+ refusal_msg_ur: str
20
+ refusal_msg_en: str
21
+ blocked_patterns: List[str]
22
+ soft_patterns: List[str]
23
+
24
+ @classmethod
25
+ def from_yaml(cls, path: str):
26
+ import yaml
27
+ with open(path, 'r', encoding='utf-8') as f:
28
+ data = yaml.safe_load(f)
29
+ return cls(
30
+ refusal_msg_ur=data['refusal_msg_ur'],
31
+ refusal_msg_en=data['refusal_msg_en'],
32
+ blocked_patterns=data['blocked_patterns'],
33
+ soft_patterns=data['soft_patterns'],
34
+ )
35
+
36
+ def check(self, text: str) -> Tuple[str, str]:
37
+ low = text.lower()
38
+ for p in self.blocked_patterns:
39
+ if re.search(p, low):
40
+ return ("BLOCK", p)
41
+ for p in self.soft_patterns:
42
+ if re.search(p, low):
43
+ return ("SOFT", p)
44
+ return ("OK", "")
45
+
46
+ # ---- Very small on-CPU model (placeholder) ----
47
+ def tiny_response(user_msg: str, history: List[Tuple[str, str]], lang: str) -> str:
48
+ prefix_ur = "safePak: "
49
+ prefix_en = "safePak: "
50
+ if lang == 'ur':
51
+ return prefix_ur + "میں آپ کی رہنمائی کے لیے حاضر ہوں۔ براہِ کرم سوال واضح کریں یا کوئی مثال دیں۔"
52
+ else:
53
+ return prefix_en + "I'm here to help. Please clarify your question or share an example."
54
+
55
+ # ---- RAG (toy, in-memory) ----
56
+ RAG_DOCS: List[Dict] = []
57
+
58
+ def add_doc(text: str):
59
+ if not text.strip():
60
+ return 0
61
+ RAG_DOCS.append({"text": text, "lang": detect_lang(text)})
62
+ return len(RAG_DOCS)
63
+
64
+ def retrieve(query: str, k: int = 3) -> List[str]:
65
+ q_tokens = set(re.findall(r"\w+", query.lower()))
66
+ scored = []
67
+ for d in RAG_DOCS:
68
+ d_tokens = set(re.findall(r"\w+", d["text"].lower()))
69
+ score = len(q_tokens & d_tokens)
70
+ if score:
71
+ scored.append((score, d["text"]))
72
+ scored.sort(reverse=True, key=lambda x: x[0])
73
+ return [t for _, t in scored[:k]]
74
+
75
+ # ---- Load guardrails ----
76
+ GUARD = Guardrails.from_yaml("guardrails.yaml")
77
+
78
+ SYSTEM_PROMPT_UR = (
79
+ "آپ safePak ہیں: مقامی زبانوں میں واضح، مہذب اور ذمہ دار جوابات دیں۔ "
80
+ "نفرت انگیزی، خود نقصان، غیر قانونی ہدایات یا ذاتی حساس معلومات میں براہِ راست مدد نہ دیں۔ "
81
+ "ضرورت پڑنے پر محفوظ متبادل فراہم کریں۔"
82
+ )
83
+ SYSTEM_PROMPT_EN = (
84
+ "You are safePak: helpful, clear, and responsible. Avoid facilitating hate, self-harm, or illegal activity. "
85
+ "Offer safe alternatives when needed."
86
+ )
87
+
88
+ # ---- Chat logic ----
89
+ def chat_fn(user_msg: str, chat_history: List[Tuple[str, str]], use_rag: bool):
90
+ if not user_msg:
91
+ return chat_history
92
+
93
+ lang = detect_lang(user_msg)
94
+ status, reason = GUARD.check(user_msg)
95
+ if status == "BLOCK":
96
+ msg = GUARD.refusal_msg_ur if lang == 'ur' else GUARD.refusal_msg_en
97
+ chat_history = chat_history + [(user_msg, msg)]
98
+ return chat_history
99
+
100
+ context = []
101
+ if use_rag:
102
+ context = retrieve(user_msg, k=3)
103
+
104
+ model_reply = tiny_response(user_msg, chat_history, lang)
105
+ if context:
106
+ if lang == 'ur':
107
+ model_reply += "\n\n(متعلقہ مواد)\n- " + "\n- ".join(context)
108
+ else:
109
+ model_reply += "\n\n(Related)\n- " + "\n- ".join(context)
110
+
111
+ if status == "SOFT":
112
+ if lang == 'ur':
113
+ model_reply = (
114
+ "احتیاط: آپ کے سوال میں حساس موضوعات شامل ہو سکتے ہیں۔ معلومات عمومی رہنمائی کی حد تک دی جا رہی ہے۔\n\n"
115
+ + model_reply
116
+ )
117
+ else:
118
+ model_reply = (
119
+ "Note: Your question may include sensitive topics. Responding with general guidance only.\n\n"
120
+ + model_reply
121
+ )
122
+
123
+ chat_history = chat_history + [(user_msg, model_reply)]
124
+ return chat_history
125
+
126
+ # ---- Doc upload ----
127
+ def ingest_files(files: List[gr.File]):
128
+ for f in files or []:
129
+ try:
130
+ text = open(f.name, 'r', encoding='utf-8', errors='ignore').read()
131
+ add_doc(text)
132
+ except Exception as e:
133
+ print("Failed to read", f.name, e)
134
+ return f"Docs in store: {len(RAG_DOCS)}"
135
+
136
+ # ---- UI ----
137
+ with gr.Blocks(title="safePak – Local-first Starter", css=".wrap {max-width: 900px; margin: 0 auto}") as demo:
138
+ gr.Markdown("""
139
+ # safePak (Starter)
140
+ **لوکل-فرسٹ** ڈیزائن کے ساتھ ایک سادہ ڈیمو۔ یہ Space آن لائن چلتا ہے، مگر آ�� اسی UI/لاجک کو اپنی مقامی ایپ میں منتقل کر سکتے ہیں۔
141
+ """)
142
+
143
+ with gr.Row():
144
+ use_rag = gr.Checkbox(label="Use local knowledge (RAG)", value=True)
145
+ with gr.Column():
146
+ file_uploader = gr.Files(label="Upload .txt files for local knowledge", file_types=[".txt"], file_count="multiple")
147
+ ingest_btn = gr.Button("Ingest files")
148
+ rag_status = gr.Markdown("Docs in store: 0")
149
+
150
+ chatbot = gr.Chatbot(height=420)
151
+ msg = gr.Textbox(label="پیغام / Message", placeholder="یہاں لکھیں…", lines=2)
152
+ clear = gr.Button("Clear")
153
+
154
+ ingest_btn.click(ingest_files, inputs=[file_uploader], outputs=[rag_status])
155
+ msg.submit(lambda u,h,r: (chat_fn(u,h,r), ""), [msg,chatbot,use_rag], [chatbot,msg])
156
+ clear.click(lambda: ([], "Docs in store: 0"), [], [chatbot, rag_status])
157
+
158
+ if __name__ == "__main__":
159
+ demo.launch()