hashirehtisham commited on
Commit
fb43022
·
verified ·
1 Parent(s): 738149b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from huggingface_hub import InferenceClient
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+
7
+ # Load text from stored data
8
+ with open("verilog.txt", "r", encoding="utf-8") as f:
9
+ data = f.read()
10
+
11
+ # Simple chunking
12
+ chunks = data.split("\n\n")
13
+
14
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
15
+ embeddings = embedder.encode(chunks)
16
+
17
+ # Create Faiss index
18
+ dimension = embeddings.shape[1]
19
+ index = faiss.IndexFlatL2(dimension)
20
+ index.add(embeddings)
21
+
22
+ def rag_retrieve(query, top_k=3):
23
+ query_emb = embedder.encode([query])
24
+ distances, indices = index.search(query_emb, top_k)
25
+ retrieved_chunks = [chunks[i] for i in indices[0]]
26
+ return "\n".join(retrieved_chunks)
27
+
28
+ def respond(
29
+ message,
30
+ history: list[dict[str, str]],
31
+ system_message,
32
+ max_tokens,
33
+ temperature,
34
+ top_p,
35
+ hf_token: gr.OAuthToken,
36
+ ):
37
+
38
+ retrieved_context = rag_retrieve(message)
39
+
40
+ rag_augmented_system = (
41
+ f"{system_message}\n\n"
42
+ "Relevant medical guidance below:\n"
43
+ f"{retrieved_context}\n\n"
44
+ "Use this information while responding clearly and politely."
45
+ )
46
+
47
+ client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
48
+
49
+ messages = [{"role": "system", "content": rag_augmented_system}]
50
+ messages.extend(history)
51
+ messages.append({"role": "user", "content": message})
52
+
53
+ response = ""
54
+
55
+ for message in client.chat_completion(
56
+ messages,
57
+ max_tokens=max_tokens,
58
+ stream=True,
59
+ temperature=temperature,
60
+ top_p=top_p,
61
+ ):
62
+ choices = message.choices
63
+ token = ""
64
+ if len(choices) and choices[0].delta.content:
65
+ token = choices[0].delta.content
66
+
67
+ response += token
68
+ yield response
69
+
70
+
71
+ chatbot = gr.ChatInterface(
72
+ respond,
73
+ type="messages",
74
+ additional_inputs=[
75
+ gr.Textbox(value="You are an AI based knowledge base of the ICT project of 16-bit RISC processor built in verilog by Hashir Ehtisham, Abdullah Ikram and Hadi Khan Lodhi.", label="System message"),
76
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max new tokens"),
77
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
78
+ gr.Slider(
79
+ minimum=0.1,
80
+ maximum=1.0,
81
+ value=0.95,
82
+ step=0.05,
83
+ label="Top-p (nucleus sampling)",
84
+ ),
85
+ ],
86
+ )
87
+
88
+ with gr.Blocks() as demo:
89
+ with gr.Sidebar():
90
+ gr.LoginButton()
91
+ chatbot.render()
92
+
93
+
94
+ if __name__ == "__main__":
95
+ demo.launch()