ahmedembedded commited on
Commit
787bd00
Β·
verified Β·
1 Parent(s): fcfca6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -60
app.py CHANGED
@@ -1,63 +1,84 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
 
1
+ # app.py
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ from peft import AutoPeftModelForCausalLM
5
+ from transformers import AutoTokenizer
6
+ import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ app = FastAPI()
9
+
10
+ model_name = "ahmedembedded/AskFAST"
11
+ load_in_4bit = True
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+
14
+ model = AutoPeftModelForCausalLM.from_pretrained(model_name, load_in_4bit=load_in_4bit).to(device)
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
+
17
+ FAST_prompt_context = """You are an admission officer at Fast University Pakistan. Your role is to answer queries related to the admission process at Fast University. You are expected to provide detailed and accurate responses to questions regarding:
18
+
19
+ - Application deadlines
20
+ - Required documents
21
+ - Eligibility criteria for different programs
22
+ - Admission process details
23
+ - Any other admissions-related information specific to Fast University Pakistan
24
+
25
+ Do not respond to any questions that are not related to admissions at Fast University Pakistan. Maintain a professional and helpful tone, ensuring that prospective students receive the information they need to apply successfully. If there's a question about comparisons, respond "I'm not a career counselling bot".
26
+
27
+ **Example Questions:**
28
+
29
+ 1. What is the application deadline for the upcoming semester?
30
+ 2. What documents are required for the application?
31
+ 3. What are the eligibility criteria for the Computer Science program?
32
+ 4. How competitive is the admission process for the Business Administration program?
33
+
34
+ Stay focused on admissions-related topics only.
35
+
36
+ ### Instruction:
37
+ {}
38
+
39
+ ### Input:
40
+ {}
41
+
42
+ ### Response:
43
+ {}"""
44
+
45
+ past_prompts = []
46
+
47
+ class Query(BaseModel):
48
+ question: str
49
+
50
+ def get_answer(question: str) -> str:
51
+ if len(past_prompts) >= 10:
52
+ past_prompts.pop(0)
53
+
54
+ past_prompts.append(f"User: {question}")
55
+
56
+ inputs = tokenizer(
57
+ [
58
+ FAST_prompt_context.format(
59
+ past_prompts,
60
+ question,
61
+ "",
62
+ )
63
+ ], return_tensors="pt").to(device)
64
+
65
+ response = model.generate(**inputs, max_new_tokens=128)
66
+
67
+ response_text = tokenizer.decode(response[0], skip_special_tokens=True)
68
+
69
+ past_prompts.append(question)
70
+ past_prompts.append(response_text.split('Response:')[1].split('### Input:')[0])
71
+
72
+ return response_text.split('Response:')[1].split('### Input:')[0]
73
+
74
+ @app.post("/predict/")
75
+ async def predict(query: Query):
76
+ try:
77
+ answer = get_answer(query.question)
78
+ return {"answer": answer}
79
+ except Exception as e:
80
+ raise HTTPException(status_code=500, detail=str(e))
81
 
82
  if __name__ == "__main__":
83
+ import uvicorn
84
+ uvicorn.run(app, host="0.0.0.0", port=8000)