Trigger82 commited on
Commit
08aad81
Β·
verified Β·
1 Parent(s): e44d7d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -28
app.py CHANGED
@@ -1,37 +1,37 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
4
  import urllib.parse
5
 
6
- # Load model and tokenizer
7
  model_id = "microsoft/phi-2"
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
  model = AutoModelForCausalLM.from_pretrained(model_id)
10
 
11
- # Global memory for all users
12
  chat_history = {}
13
 
14
- # Format past messages
15
  def format_context(history):
16
- context = ""
17
- for user, bot in history[-3:]: # Last 3 exchanges
18
- context += f"You: {user}\n𝕴 𝖆𝖒 π–π–Žπ–’: {bot}\n"
19
- return context
20
-
21
- # Main chat function with memory per user
22
- def chat_with_memory(query_string):
23
- parsed = urllib.parse.parse_qs(query_string)
24
- user_input = parsed.get("query", [""])[0]
25
- user_id = parsed.get("user_id", ["default"])[0]
26
-
27
- # Get or init user history
28
- history = chat_history.get(user_id, [])
29
 
30
- # Format prompt
31
- context = format_context(history) + f"You: {user_input}\n𝕴 𝖆𝖒 π–π–Žπ–’:"
 
 
 
 
 
 
 
32
 
33
- # Tokenize & generate
34
- inputs = tokenizer(context, return_tensors="pt", return_attention_mask=True)
35
  outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
36
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True).split("𝕴 𝖆𝖒 π–π–Žπ–’:")[-1].strip()
37
 
@@ -39,13 +39,10 @@ def chat_with_memory(query_string):
39
  history.append((user_input, reply))
40
  chat_history[user_id] = history[-10:]
41
 
42
- return {"reply": reply}
43
 
44
- # Create public /ai?query=&user_id=
45
- iface = gr.Interface(
46
- fn=chat_with_memory,
47
- inputs="text", # URL query string
48
- outputs="json"
49
- )
50
 
51
- iface.launch()
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from fastapi import FastAPI, Request
5
+ from fastapi.responses import JSONResponse
6
  import urllib.parse
7
 
8
+ # Load model
9
  model_id = "microsoft/phi-2"
10
  tokenizer = AutoTokenizer.from_pretrained(model_id)
11
  model = AutoModelForCausalLM.from_pretrained(model_id)
12
 
13
+ # Memory for users
14
  chat_history = {}
15
 
16
+ # Format history
17
  def format_context(history):
18
+ return "".join([f"You: {u}\n𝕴 𝖆𝖒 π–π–Žπ–’: {b}\n" for u, b in history[-3:]])
19
+
20
+ # FastAPI app
21
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
22
 
23
+ @app.get("/ai")
24
+ async def ai_chat(request: Request):
25
+ query_params = dict(request.query_params)
26
+ user_input = query_params.get("query", "")
27
+ user_id = query_params.get("user_id", "default")
28
+
29
+ # Get user history
30
+ history = chat_history.get(user_id, [])
31
+ prompt = format_context(history) + f"You: {user_input}\n𝕴 𝖆𝖒 π–π–Žπ–’:"
32
 
33
+ # Tokenize & run model
34
+ inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True)
35
  outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
36
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True).split("𝕴 𝖆𝖒 π–π–Žπ–’:")[-1].strip()
37
 
 
39
  history.append((user_input, reply))
40
  chat_history[user_id] = history[-10:]
41
 
42
+ return JSONResponse({"reply": reply})
43
 
44
+ # Wrap with Gradio to serve
45
+ app = gr.mount_gradio_app(app, gr.Interface(lambda x: x, "textbox", "textbox"))
 
 
 
 
46
 
47
+ # Launch it
48
+ gradio_app = gr.FastAPI(app)