Youssefk commited on
Commit
f3b738b
·
1 Parent(s): 065e2b1
Files changed (1) hide show
  1. app.py +99 -44
app.py CHANGED
@@ -1,54 +1,109 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
2
- import torch
3
- import streamlit as st
4
 
5
- st.write("im here")
6
- # Load the pre-trained tokenizer and model
7
- tokenizer = AutoTokenizer.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B")
8
- model = AutoModelForCausalLM.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B")
9
 
10
 
11
- dataa = "My name is youssef khemiri i am 21 years old and i am a data scientist"
12
- st.write(dataa)
13
 
14
- # Prepare the dataset
15
- train_dataset = TextDataset(
16
- tokenizer=tokenizer,
17
- file_path=dataa,
18
- block_size=128,
19
- )
20
- st.write("hi1")
21
- # Prepare the data collator
22
- data_collator = DataCollatorForLanguageModeling(
23
- tokenizer=tokenizer, mlm=False,
24
- )
25
 
26
- # Initialize the trainer
27
- training_args = TrainingArguments(
28
- output_dir='./results', # output directory
29
- num_train_epochs=3, # total number of training epochs
30
- per_device_train_batch_size=16, # batch size per device during training
31
- save_steps=10_000, # number of steps between saving checkpoints
32
- save_total_limit=2, # limit the total amount of checkpoints to save
33
- prediction_loss_only=True,
34
- learning_rate=5e-5,
35
- )
36
- st.write("hi2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- trainer = Trainer(
39
- model=model,
40
- args=training_args,
41
- train_dataset=train_dataset,
42
- data_collator=data_collator,
 
 
 
43
  )
44
 
45
- # Fine-tune the model
46
- trainer.train()
47
- st.write("finished training")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- # infer
50
- inputs = tokenizer("<human>: Tell me about youssef khemiri\n<bot>:", return_tensors='pt').to(model.device)
51
- outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
52
- output_str = tokenizer.decode(outputs[0])
53
- st.write(output_str)
54
 
 
1
+ # from transformers import AutoTokenizer, AutoModelForCausalLM, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
2
+ # import torch
3
+ # import streamlit as st
4
 
5
+ # st.write("im here")
6
+ # # Load the pre-trained tokenizer and model
7
+ # tokenizer = AutoTokenizer.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B")
8
+ # model = AutoModelForCausalLM.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B")
9
 
10
 
11
+ # dataa = "My name is youssef khemiri i am 21 years old and i am a data scientist"
12
+ # st.write(dataa)
13
 
14
+ # # Prepare the dataset
15
+ # train_dataset = TextDataset(
16
+ # tokenizer=tokenizer,
17
+ # file_path=dataa,
18
+ # block_size=128,
19
+ # )
20
+ # st.write("hi1")
21
+ # # Prepare the data collator
22
+ # data_collator = DataCollatorForLanguageModeling(
23
+ # tokenizer=tokenizer, mlm=False,
24
+ # )
25
 
26
+ # # Initialize the trainer
27
+ # training_args = TrainingArguments(
28
+ # output_dir='./results', # output directory
29
+ # num_train_epochs=3, # total number of training epochs
30
+ # per_device_train_batch_size=16, # batch size per device during training
31
+ # save_steps=10_000, # number of steps between saving checkpoints
32
+ # save_total_limit=2, # limit the total amount of checkpoints to save
33
+ # prediction_loss_only=True,
34
+ # learning_rate=5e-5,
35
+ # )
36
+ # st.write("hi2")
37
+
38
+ # trainer = Trainer(
39
+ # model=model,
40
+ # args=training_args,
41
+ # train_dataset=train_dataset,
42
+ # data_collator=data_collator,
43
+ # )
44
+
45
+ # # Fine-tune the model
46
+ # trainer.train()
47
+ # st.write("finished training")
48
+
49
+ # # infer
50
+ # inputs = tokenizer("<human>: Tell me about youssef khemiri\n<bot>:", return_tensors='pt').to(model.device)
51
+ # outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
52
+ # output_str = tokenizer.decode(outputs[0])
53
+ # st.write(output_str)
54
 
55
+
56
+ from transformers import pipeline
57
+ import streamlit as st
58
+ from streamlit_chat import message
59
+
60
+ st.set_page_config(
61
+ page_title="WLS",
62
+ page_icon=":robot:"
63
  )
64
 
65
+ st.header("We Love Sousse ChatBot")
66
+
67
+
68
+ # Load the pre-trained question answering model
69
+ qa_model = pipeline('question-answering', model='bert-large-uncased-whole-word-masking-finetuned-squad')
70
+
71
+ # Provide the context and question for the model to answer
72
+ context = """"We Love Sousse" is a humanitarian club that is dedicated to helping those in need in the city of Sousse, Tunisia. The club is made up of passionate individuals who are committed to making a positive impact on their community by engaging in various activities that support the well-being of others.
73
+
74
+ One of the main goals of the "We Love Sousse" club is to promote humanitarianism by encouraging its members and the community at large to engage in volunteer work, charitable donations, and other forms of support for those who are struggling. The club is committed to creating a culture of compassion and kindness in Sousse, where people are always looking out for one another and working together to build a stronger and more resilient community.
75
+
76
+ The "We Love Sousse" club engages in a wide range of activities to achieve its goals. Some of these activities include:
77
+
78
+ Community Service: The club organizes regular volunteer work in various locations throughout Sousse, including hospitals, orphanages, and elderly care facilities. Members of the club spend time with the residents, provide them with assistance and support, and help to improve their living conditions.
79
+
80
+ Fundraising: The club raises funds through various activities, such as charity events, auctions, and crowdfunding campaigns. The money raised is used to support various humanitarian causes in the city, such as providing food, clothing, and shelter to those in need.
81
+
82
+ Awareness Campaigns: The club runs awareness campaigns to educate the community about various social issues affecting the city. These campaigns cover topics such as homelessness, poverty, healthcare, and education, and aim to inspire people to take action and make a positive difference in the lives of others.
83
+
84
+ Donations: The club collects donations of food, clothing, and other essential items from members of the community and distributes them to those in need. The club also organizes toy drives during the holiday season to provide gifts to children who might otherwise go without.
85
+
86
+ Overall, the "We Love Sousse" club is committed to making a positive difference in the lives of others in Sousse through its various humanitarian activities. The club's goal is to create a more compassionate, empathetic, and supportive community where everyone has the opportunity to thrive. """
87
+
88
+ if 'generated' not in st.session_state:
89
+ st.session_state['generated'] = []
90
+
91
+ if 'past' not in st.session_state:
92
+ st.session_state['past'] = []
93
+
94
+ def get_text():
95
+ input_text = st.text_input("You: "," ", key="input")
96
+ return input_text
97
+
98
+ message("Hello, How can I help you?")
99
+
100
+
101
+ question = get_text()
102
+
103
+ # Generate the answer using the model
104
+ answer = qa_model(question=question, context=context)
105
+
106
 
107
+ # Print the answer
108
+ # print(answer)
 
 
 
109