AGofficial commited on
Commit
e5e4fa5
ยท
verified ยท
1 Parent(s): 6f0338c

Upload aglab.py

Browse files
Files changed (1) hide show
  1. aglab.py +142 -0
aglab.py CHANGED
@@ -1,8 +1,66 @@
1
  from model import AIModel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  class AgLab:
4
  def __init__(self, system_prompt: str = ""):
5
  self.system_prompt = system_prompt
 
 
 
6
 
7
  def __raw_ask(self, prompt: str) -> str:
8
  '''
@@ -39,7 +97,91 @@ class AgLab:
39
  full_prompt = " Summarize the following text: " + text + " send only the summary of the previous text, do not reply, send only the summary."
40
  response = self.__raw_ask(full_prompt)
41
  return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  if __name__ == "__main__":
44
  aglab = AgLab("You are a helpful assistant called ag lab llm.")
45
  print(aglab.AskAgLabLLM("What is the capital of France, also what is your name?"))
 
 
 
 
 
 
 
1
  from model import AIModel
2
+ import random
3
+ import os
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+
7
+ def fetch_wikipedia_text(title):
8
+ url = f"https://en.wikipedia.org/api/rest_v1/page/html/{title}"
9
+ headers = {"User-Agent": "Mozilla/5.0"}
10
+ response = requests.get(url, headers=headers)
11
+ if response.status_code == 200:
12
+ soup = BeautifulSoup(response.content, "html.parser")
13
+ return soup.get_text()
14
+ else:
15
+ print(f"Failed to fetch '{title}'")
16
+ return ""
17
+
18
+ def collect_wikipedia_data(titles, min_chars=25000):
19
+ all_text = ""
20
+ for title in titles:
21
+ print(f"Fetching: {title}")
22
+ text = fetch_wikipedia_text(title)
23
+ all_text += f"\n\n=== {title.replace('_', ' ')} ===\n\n{text}"
24
+ if len(all_text) >= min_chars:
25
+ break
26
+ return all_text
27
+
28
+ file_path = 'agailab/data.txt'
29
+
30
+ if not os.path.exists(file_path):
31
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
32
+
33
+ article_titles = [
34
+ "Artificial_intelligence", "Machine_learning", "Deep_learning", "Neural_network",
35
+ "Natural_language_processing", "Computer_vision", "Reinforcement_learning",
36
+ "Supervised_learning", "Unsupervised_learning", "Turing_test", "ChatGPT",
37
+ "Large_language_model", "OpenAI", "Automation", "Data_science", "AI_ethics",
38
+ "Robotics", "Cognitive_computing", "Computer_science", "Algorithm",
39
+ "Big_data", "Pattern_recognition", "Knowledge_representation", "Expert_system", "Intelligent_agent"
40
+ ]
41
+
42
+ text_data = collect_wikipedia_data(article_titles)
43
+
44
+ with open(file_path, 'w', encoding='utf-8') as f:
45
+ f.write(text_data)
46
+
47
+ print(f"Saved {len(text_data)} characters of Wikipedia text to {file_path}")
48
+ else:
49
+ print("File already exists.")
50
+
51
+
52
+ with open(file_path, 'r', encoding='utf-8') as f:
53
+ words = f.read().split()
54
+
55
+ with open(file_path, 'r', encoding='utf-8') as f:
56
+ corpus = f.read()
57
 
58
  class AgLab:
59
  def __init__(self, system_prompt: str = ""):
60
  self.system_prompt = system_prompt
61
+ with open(file_path, 'r', encoding='utf-8') as f:
62
+ self.corpus = f.read()
63
+ self.words = self.corpus.split()
64
 
65
  def __raw_ask(self, prompt: str) -> str:
66
  '''
 
97
  full_prompt = " Summarize the following text: " + text + " send only the summary of the previous text, do not reply, send only the summary."
98
  response = self.__raw_ask(full_prompt)
99
  return response
100
+
101
+ def TurnToBulletPoints(self, text: str) -> str:
102
+ '''
103
+ Turn a text into bullet points using the AgLab LLM and get a response as text.
104
+ '''
105
+ full_prompt = " Turn the following text into bullet points: " + text + " send only the bullet points of the previous text, do not reply, send only the bullet points."
106
+ response = self.__raw_ask(full_prompt)
107
+ return response
108
+
109
+ def RandomEmojiSequence(self, length: int = 5) -> str:
110
+ '''
111
+ Generate a random sequence of emojis.
112
+ '''
113
+ emojis = [
114
+ "๐Ÿ˜€", "๐Ÿ˜‚", "๐Ÿ˜ƒ", "๐Ÿ˜„", "๐Ÿ˜…", "๐Ÿ˜†", "๐Ÿ˜‰", "๐Ÿ˜Š", "๐Ÿ˜‹", "๐Ÿ˜Ž",
115
+ "๐Ÿ˜", "๐Ÿ˜˜", "๐Ÿ˜—", "๐Ÿ˜™", "๐Ÿ˜š", "๐Ÿ˜‡", "๐Ÿ˜ˆ", "๐Ÿ‘ฟ", "๐Ÿ‘ป", "๐Ÿ’€",
116
+ "๐Ÿค–", "๐Ÿ‘ฝ", "๐Ÿ‘พ", "๐ŸŽƒ", "๐Ÿ˜บ", "๐Ÿ˜ธ", "๐Ÿ˜น", "๐Ÿ˜ป", "๐Ÿ˜ผ", "๐Ÿ˜ฝ",
117
+ "๐Ÿ™€", "๐Ÿ˜ฟ", "๐Ÿ˜พ", "๐Ÿถ", "๐Ÿฑ", "๐Ÿญ", "๐Ÿน", "๐Ÿฐ", "๐ŸฆŠ", "๐Ÿป",
118
+ "๐Ÿผ", "๐Ÿจ", "๐Ÿฏ", "๐Ÿฆ", "๐Ÿฎ", "๐Ÿท", "๐Ÿฝ", "๐Ÿธ", "๐Ÿต", "๐Ÿ™ˆ",
119
+ "๐Ÿ™‰", "๐Ÿ™Š", "๐Ÿ’", "๐Ÿ”", "๐Ÿง", "๐Ÿฆ", "๐Ÿค", "๐Ÿฃ", "๐Ÿฅ", "๐Ÿฆ†",
120
+ "๐Ÿฆ…", "๐Ÿฆ‰", "๐Ÿฆ‡", "๐Ÿบ", "๐Ÿ—", "๐Ÿด", "๐Ÿฆ„", "๐Ÿ", "๐Ÿ›", "๐Ÿฆ‹",
121
+ "๐ŸŒ", "๐Ÿž", "๐Ÿœ", "๐Ÿชฒ", "๐Ÿชณ", "๐Ÿ•ท", "๐Ÿ•ธ", "๐Ÿข", "๐Ÿ", "๐ŸฆŽ",
122
+ "๐Ÿฆ‚", "๐Ÿฆ€", "๐Ÿฆž", "๐Ÿฆ", "๐Ÿฆ‘", "๐Ÿ™", "๐Ÿชธ", "๐Ÿ ", "๐ŸŸ", "๐Ÿก",
123
+ "๐Ÿฆˆ", "๐Ÿฌ", "๐Ÿณ", "๐Ÿ‹", "๐Ÿฆญ", "๐ŸŠ", "๐Ÿฆง", "๐Ÿฆ", "๐Ÿฆฃ", "๐Ÿ˜",
124
+ "๐Ÿฆ", "๐Ÿฆ›", "๐Ÿช", "๐Ÿซ", "๐Ÿฆ’", "๐Ÿฆ˜", "๐Ÿฆฌ", "๐Ÿƒ", "๐Ÿ‚", "๐Ÿ„",
125
+ "๐ŸŽ", "๐Ÿ–", "๐Ÿ", "๐Ÿ‘", "๐Ÿฆ™", "๐Ÿ", "๐ŸฆŒ", "๐Ÿ•", "๐Ÿฉ", "๐Ÿฆฎ",
126
+ "๐Ÿ•โ€๐Ÿฆบ", "๐Ÿˆ", "๐Ÿˆโ€โฌ›", "๐Ÿชถ", "๐Ÿ“", "๐Ÿฆƒ", "๐Ÿฆค", "๐Ÿฆš", "๐Ÿฆœ",
127
+ "๐Ÿฆข", "๐Ÿฆฉ", "๐Ÿ•Š", "๐Ÿ‡", "๐Ÿฆ", "๐Ÿฆจ", "๐Ÿฆก", "๐Ÿฆซ", "๐Ÿฆฆ", "๐Ÿฆฅ",
128
+ "๐Ÿ", "๐Ÿ€", "๐Ÿฟ", "๐Ÿฆ”", "๐Ÿพ", "๐Ÿ‰", "๐Ÿฒ", "๐ŸŒต", "๐ŸŽ„", "๐ŸŒฒ",
129
+ "๐ŸŒณ", "๐ŸŒด", "๐Ÿชต", "๐ŸŒฑ", "๐ŸŒฟ", "โ˜˜", "๐Ÿ€", "๐ŸŽ", "๐Ÿชด", "๐ŸŽ‹",
130
+ "๐Ÿƒ", "๐Ÿ‚", "๐Ÿ", "๐Ÿ„", "๐Ÿš", "๐Ÿชจ", "๐ŸŒพ", "๐Ÿ’", "๐ŸŒท", "๐ŸŒน",
131
+ "๐Ÿฅ€", "๐ŸŒบ", "๐ŸŒธ", "๐ŸŒผ", "๐ŸŒป", "๐ŸŒž", "๐ŸŒ", "๐ŸŒ›", "๐ŸŒœ", "๐ŸŒš",
132
+ "๐ŸŒ•", "๐ŸŒ–", "๐ŸŒ—", "๐ŸŒ˜", "๐ŸŒ‘", "๐ŸŒ’", "๐ŸŒ“", "๐ŸŒ”", "๐ŸŒ™", "๐ŸŒŽ",
133
+ "๐ŸŒ", "๐ŸŒ", "๐Ÿช", "๐Ÿ’ซ", "โญ", "๐ŸŒŸ", "โœจ", "โšก", "โ˜„", "๐Ÿ’ฅ",
134
+ "๐Ÿ”ฅ", "๐ŸŒช", "๐ŸŒˆ", "โ˜€", "๐ŸŒค", "โ›…", "๐ŸŒฅ", "โ˜", "๐ŸŒฆ", "๐ŸŒง",
135
+ "โ›ˆ", "๐ŸŒฉ", "๐ŸŒจ", "โ„", "โ˜ƒ", "โ›„", "๐ŸŒฌ", "๐Ÿ’จ", "๐Ÿ’ง", "๐Ÿ’ฆ",
136
+ "โ˜”", "โ˜‚", "๐ŸŒŠ", "๐ŸŒซ"
137
+ ]
138
+ return ''.join(random.choice(emojis) for _ in range(length))
139
+
140
+ def RandomTextSequence(self, length: int = 5) -> str:
141
+ '''
142
+ Generate a random sequence of words.
143
+ '''
144
+ return ' '.join(random.choice(words) for _ in range(length))
145
+
146
+ def PredictNextWord(self, text_string: str) -> str:
147
+ '''
148
+ Predict the next word in a given text string using a local LLM approach.
149
+ It finds the longest matching suffix in the corpus and returns the next word.
150
+ '''
151
+ text_words = text_string.split()
152
+ if not text_words:
153
+ return ""
154
 
155
+ for i in range(len(text_words), 0, -1):
156
+ sequence = ' '.join(text_words[-i:])
157
+ pattern = f"{sequence} "
158
+ start_index = self.corpus.find(pattern)
159
+ if start_index != -1:
160
+ end_index = start_index + len(pattern)
161
+ remaining = self.corpus[end_index:].strip()
162
+ if remaining:
163
+ return remaining.split()[0]
164
+
165
+ return "<|endoftext|>"
166
+
167
+ def GenerateLocalText(self, text_string: str, length=10) -> str:
168
+ '''
169
+ Generate text based on a given text string using a local LLM approach.
170
+ It finds the longest matching suffix in the corpus and returns the next words.
171
+ '''
172
+ for i in range(length):
173
+ next_word = self.PredictNextWord(text_string)
174
+ if next_word == "<|endoftext|>":
175
+ break
176
+ text_string += " " + next_word
177
+ return text_string
178
+
179
  if __name__ == "__main__":
180
  aglab = AgLab("You are a helpful assistant called ag lab llm.")
181
  print(aglab.AskAgLabLLM("What is the capital of France, also what is your name?"))
182
+ print(aglab.RandomEmojiSequence(10))
183
+ print(aglab.SummarizeText("The quick brown fox jumps over the lazy dog."))
184
+ print(aglab.TurnToBulletPoints("The quick brown fox jumps over the lazy dog."))
185
+ print(aglab.RandomTextSequence(10))
186
+ print(aglab.PredictNextWord("Artificial"))
187
+ print(aglab.GenerateLocalText("Artificial", 10))