Harshithacj123 commited on
Commit
e2f9956
·
verified ·
1 Parent(s): 41a3356

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+ import transformers
3
+ import torch
4
+ import gradio as gr
5
+
6
+
7
+ model = "Harshithacj123/CCU_Llama_7b_chat_test" # meta-llama/Llama-2-7b-chat-hf
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(model)
10
+
11
+ from transformers import pipeline
12
+
13
+ llama_pipeline = pipeline(
14
+ "text-generation", # LLM task
15
+ model=model,
16
+ torch_dtype=torch.float16,
17
+ device_map="auto",
18
+ )
19
+
20
+ SYSTEM_PROMPT = """<s>[INST] <<SYS>>
21
+ You are a helpful bot. Your answers are clear and concise.
22
+ <</SYS>>
23
+
24
+ """
25
+
26
+ # Formatting function for message and history
27
+ def format_message(message: str) -> str:
28
+
29
+ formatted_message = f"<s>[INST] {message} [/INST]"
30
+
31
+ return formatted_message
32
+
33
+ # Generate a response from the Llama model
34
+ def get_llama_response(message: str) -> str:
35
+ """
36
+ Generates a conversational response from the Llama model.
37
+ Parameters:
38
+ message (str): User's input message.
39
+ history (list): Past conversation history.
40
+ Returns:
41
+ str: Generated response from the Llama model.
42
+ """
43
+ query = format_message(message)
44
+ response = ""
45
+
46
+ sequences = llama_pipeline(
47
+ query,
48
+ do_sample=True,
49
+ top_k=10,
50
+ num_return_sequences=1,
51
+ eos_token_id=tokenizer.eos_token_id,
52
+ max_length=256,
53
+ )
54
+
55
+ generated_text = sequences[0]['generated_text']
56
+ response = generated_text[len(query):] # Remove the prompt from the output
57
+
58
+ return response.strip()
59
+
60
+
61
+ gr.ChatInterface(get_llama_response).launch()