ahmedembedded commited on
Commit
b259e69
·
verified ·
1 Parent(s): 2c4728a

Delete inference.py

Browse files
Files changed (1) hide show
  1. inference.py +0 -73
inference.py DELETED
@@ -1,73 +0,0 @@
1
- # inference.py
2
-
3
- import torch
4
- from transformers import AutoTokenizer
5
- from peft import AutoPeftModelForCausalLM
6
-
7
- # Define constants
8
- MODEL_NAME = "ahmedembedded/AskFAST"
9
- LOAD_IN_4BIT = True
10
- MAX_NEW_TOKENS = 128
11
-
12
- # Initialize model and tokenizer
13
- model = AutoPeftModelForCausalLM.from_pretrained(MODEL_NAME, load_in_4bit=LOAD_IN_4BIT)
14
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
15
-
16
- FAST_PROMPT_CONTEXT = """You are an admission officer at Fast University Pakistan. Your role is to answer queries related to the admission process at Fast University. You are expected to provide detailed and accurate responses to questions regarding:
17
-
18
- - Application deadlines
19
- - Required documents
20
- - Eligibility criteria for different programs
21
- - Admission process details
22
- - Any other admissions-related information specific to Fast University Pakistan
23
-
24
- Do not respond to any questions that are not related to admissions at Fast University Pakistan. Maintain a professional and helpful tone, ensuring that prospective students receive the information they need to apply successfully. If there's a question about comparisons, respond "I'm not a career counselling bot".
25
-
26
- **Example Questions:**
27
-
28
- 1. What is the application deadline for the upcoming semester?
29
- 2. What documents are required for the application?
30
- 3. What are the eligibility criteria for the Computer Science program?
31
- 4. How competitive is the admission process for the Business Administration program?
32
-
33
- Stay focused on admissions-related topics only.
34
-
35
- ### Instruction:
36
- {}
37
-
38
- ### Input:
39
- {}
40
-
41
- ### Response:
42
- {}"""
43
-
44
- past_prompts = []
45
-
46
- def get_answer(question: str) -> str:
47
- if len(past_prompts) >= 10:
48
- past_prompts.pop(0)
49
-
50
- past_prompts.append(f"User: {question}")
51
-
52
- inputs = tokenizer(
53
- [FAST_PROMPT_CONTEXT.format(
54
- past_prompts,
55
- question,
56
- ""
57
- )],
58
- return_tensors="pt"
59
- ).to("cuda")
60
-
61
- response = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS)
62
-
63
- response_text = tokenizer.decode(response[0], skip_special_tokens=True)
64
-
65
- past_prompts.append(question)
66
- past_prompts.append(response_text.split('Response:')[1].split('### Input:')[0])
67
-
68
- return response_text.split('Response:')[1].split('### Input:')[0]
69
-
70
- # Define a function to handle API calls
71
- def handle_request(payload):
72
- question = payload.get('inputs', '')
73
- return {"answer": get_answer(question)}