Upload aglab.py
Browse files
aglab.py
CHANGED
|
@@ -1,8 +1,66 @@
|
|
| 1 |
from model import AIModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
class AgLab:
|
| 4 |
def __init__(self, system_prompt: str = ""):
|
| 5 |
self.system_prompt = system_prompt
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def __raw_ask(self, prompt: str) -> str:
|
| 8 |
'''
|
|
@@ -39,7 +97,91 @@ class AgLab:
|
|
| 39 |
full_prompt = " Summarize the following text: " + text + " send only the summary of the previous text, do not reply, send only the summary."
|
| 40 |
response = self.__raw_ask(full_prompt)
|
| 41 |
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
if __name__ == "__main__":
|
| 44 |
aglab = AgLab("You are a helpful assistant called ag lab llm.")
|
| 45 |
print(aglab.AskAgLabLLM("What is the capital of France, also what is your name?"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from model import AIModel
|
| 2 |
+
import random
|
| 3 |
+
import os
|
| 4 |
+
import requests
|
| 5 |
+
from bs4 import BeautifulSoup
|
| 6 |
+
|
| 7 |
+
def fetch_wikipedia_text(title):
|
| 8 |
+
url = f"https://en.wikipedia.org/api/rest_v1/page/html/{title}"
|
| 9 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
| 10 |
+
response = requests.get(url, headers=headers)
|
| 11 |
+
if response.status_code == 200:
|
| 12 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 13 |
+
return soup.get_text()
|
| 14 |
+
else:
|
| 15 |
+
print(f"Failed to fetch '{title}'")
|
| 16 |
+
return ""
|
| 17 |
+
|
| 18 |
+
def collect_wikipedia_data(titles, min_chars=25000):
|
| 19 |
+
all_text = ""
|
| 20 |
+
for title in titles:
|
| 21 |
+
print(f"Fetching: {title}")
|
| 22 |
+
text = fetch_wikipedia_text(title)
|
| 23 |
+
all_text += f"\n\n=== {title.replace('_', ' ')} ===\n\n{text}"
|
| 24 |
+
if len(all_text) >= min_chars:
|
| 25 |
+
break
|
| 26 |
+
return all_text
|
| 27 |
+
|
| 28 |
+
file_path = 'agailab/data.txt'
|
| 29 |
+
|
| 30 |
+
if not os.path.exists(file_path):
|
| 31 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
| 32 |
+
|
| 33 |
+
article_titles = [
|
| 34 |
+
"Artificial_intelligence", "Machine_learning", "Deep_learning", "Neural_network",
|
| 35 |
+
"Natural_language_processing", "Computer_vision", "Reinforcement_learning",
|
| 36 |
+
"Supervised_learning", "Unsupervised_learning", "Turing_test", "ChatGPT",
|
| 37 |
+
"Large_language_model", "OpenAI", "Automation", "Data_science", "AI_ethics",
|
| 38 |
+
"Robotics", "Cognitive_computing", "Computer_science", "Algorithm",
|
| 39 |
+
"Big_data", "Pattern_recognition", "Knowledge_representation", "Expert_system", "Intelligent_agent"
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
text_data = collect_wikipedia_data(article_titles)
|
| 43 |
+
|
| 44 |
+
with open(file_path, 'w', encoding='utf-8') as f:
|
| 45 |
+
f.write(text_data)
|
| 46 |
+
|
| 47 |
+
print(f"Saved {len(text_data)} characters of Wikipedia text to {file_path}")
|
| 48 |
+
else:
|
| 49 |
+
print("File already exists.")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 53 |
+
words = f.read().split()
|
| 54 |
+
|
| 55 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 56 |
+
corpus = f.read()
|
| 57 |
|
| 58 |
class AgLab:
|
| 59 |
def __init__(self, system_prompt: str = ""):
|
| 60 |
self.system_prompt = system_prompt
|
| 61 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 62 |
+
self.corpus = f.read()
|
| 63 |
+
self.words = self.corpus.split()
|
| 64 |
|
| 65 |
def __raw_ask(self, prompt: str) -> str:
|
| 66 |
'''
|
|
|
|
| 97 |
full_prompt = " Summarize the following text: " + text + " send only the summary of the previous text, do not reply, send only the summary."
|
| 98 |
response = self.__raw_ask(full_prompt)
|
| 99 |
return response
|
| 100 |
+
|
| 101 |
+
def TurnToBulletPoints(self, text: str) -> str:
|
| 102 |
+
'''
|
| 103 |
+
Turn a text into bullet points using the AgLab LLM and get a response as text.
|
| 104 |
+
'''
|
| 105 |
+
full_prompt = " Turn the following text into bullet points: " + text + " send only the bullet points of the previous text, do not reply, send only the bullet points."
|
| 106 |
+
response = self.__raw_ask(full_prompt)
|
| 107 |
+
return response
|
| 108 |
+
|
| 109 |
+
def RandomEmojiSequence(self, length: int = 5) -> str:
|
| 110 |
+
'''
|
| 111 |
+
Generate a random sequence of emojis.
|
| 112 |
+
'''
|
| 113 |
+
emojis = [
|
| 114 |
+
"๐", "๐", "๐", "๐", "๐
", "๐", "๐", "๐", "๐", "๐",
|
| 115 |
+
"๐", "๐", "๐", "๐", "๐", "๐", "๐", "๐ฟ", "๐ป", "๐",
|
| 116 |
+
"๐ค", "๐ฝ", "๐พ", "๐", "๐บ", "๐ธ", "๐น", "๐ป", "๐ผ", "๐ฝ",
|
| 117 |
+
"๐", "๐ฟ", "๐พ", "๐ถ", "๐ฑ", "๐ญ", "๐น", "๐ฐ", "๐ฆ", "๐ป",
|
| 118 |
+
"๐ผ", "๐จ", "๐ฏ", "๐ฆ", "๐ฎ", "๐ท", "๐ฝ", "๐ธ", "๐ต", "๐",
|
| 119 |
+
"๐", "๐", "๐", "๐", "๐ง", "๐ฆ", "๐ค", "๐ฃ", "๐ฅ", "๐ฆ",
|
| 120 |
+
"๐ฆ
", "๐ฆ", "๐ฆ", "๐บ", "๐", "๐ด", "๐ฆ", "๐", "๐", "๐ฆ",
|
| 121 |
+
"๐", "๐", "๐", "๐ชฒ", "๐ชณ", "๐ท", "๐ธ", "๐ข", "๐", "๐ฆ",
|
| 122 |
+
"๐ฆ", "๐ฆ", "๐ฆ", "๐ฆ", "๐ฆ", "๐", "๐ชธ", "๐ ", "๐", "๐ก",
|
| 123 |
+
"๐ฆ", "๐ฌ", "๐ณ", "๐", "๐ฆญ", "๐", "๐ฆง", "๐ฆ", "๐ฆฃ", "๐",
|
| 124 |
+
"๐ฆ", "๐ฆ", "๐ช", "๐ซ", "๐ฆ", "๐ฆ", "๐ฆฌ", "๐", "๐", "๐",
|
| 125 |
+
"๐", "๐", "๐", "๐", "๐ฆ", "๐", "๐ฆ", "๐", "๐ฉ", "๐ฆฎ",
|
| 126 |
+
"๐โ๐ฆบ", "๐", "๐โโฌ", "๐ชถ", "๐", "๐ฆ", "๐ฆค", "๐ฆ", "๐ฆ",
|
| 127 |
+
"๐ฆข", "๐ฆฉ", "๐", "๐", "๐ฆ", "๐ฆจ", "๐ฆก", "๐ฆซ", "๐ฆฆ", "๐ฆฅ",
|
| 128 |
+
"๐", "๐", "๐ฟ", "๐ฆ", "๐พ", "๐", "๐ฒ", "๐ต", "๐", "๐ฒ",
|
| 129 |
+
"๐ณ", "๐ด", "๐ชต", "๐ฑ", "๐ฟ", "โ", "๐", "๐", "๐ชด", "๐",
|
| 130 |
+
"๐", "๐", "๐", "๐", "๐", "๐ชจ", "๐พ", "๐", "๐ท", "๐น",
|
| 131 |
+
"๐ฅ", "๐บ", "๐ธ", "๐ผ", "๐ป", "๐", "๐", "๐", "๐", "๐",
|
| 132 |
+
"๐", "๐", "๐", "๐", "๐", "๐", "๐", "๐", "๐", "๐",
|
| 133 |
+
"๐", "๐", "๐ช", "๐ซ", "โญ", "๐", "โจ", "โก", "โ", "๐ฅ",
|
| 134 |
+
"๐ฅ", "๐ช", "๐", "โ", "๐ค", "โ
", "๐ฅ", "โ", "๐ฆ", "๐ง",
|
| 135 |
+
"โ", "๐ฉ", "๐จ", "โ", "โ", "โ", "๐ฌ", "๐จ", "๐ง", "๐ฆ",
|
| 136 |
+
"โ", "โ", "๐", "๐ซ"
|
| 137 |
+
]
|
| 138 |
+
return ''.join(random.choice(emojis) for _ in range(length))
|
| 139 |
+
|
| 140 |
+
def RandomTextSequence(self, length: int = 5) -> str:
|
| 141 |
+
'''
|
| 142 |
+
Generate a random sequence of words.
|
| 143 |
+
'''
|
| 144 |
+
return ' '.join(random.choice(words) for _ in range(length))
|
| 145 |
+
|
| 146 |
+
def PredictNextWord(self, text_string: str) -> str:
|
| 147 |
+
'''
|
| 148 |
+
Predict the next word in a given text string using a local LLM approach.
|
| 149 |
+
It finds the longest matching suffix in the corpus and returns the next word.
|
| 150 |
+
'''
|
| 151 |
+
text_words = text_string.split()
|
| 152 |
+
if not text_words:
|
| 153 |
+
return ""
|
| 154 |
|
| 155 |
+
for i in range(len(text_words), 0, -1):
|
| 156 |
+
sequence = ' '.join(text_words[-i:])
|
| 157 |
+
pattern = f"{sequence} "
|
| 158 |
+
start_index = self.corpus.find(pattern)
|
| 159 |
+
if start_index != -1:
|
| 160 |
+
end_index = start_index + len(pattern)
|
| 161 |
+
remaining = self.corpus[end_index:].strip()
|
| 162 |
+
if remaining:
|
| 163 |
+
return remaining.split()[0]
|
| 164 |
+
|
| 165 |
+
return "<|endoftext|>"
|
| 166 |
+
|
| 167 |
+
def GenerateLocalText(self, text_string: str, length=10) -> str:
|
| 168 |
+
'''
|
| 169 |
+
Generate text based on a given text string using a local LLM approach.
|
| 170 |
+
It finds the longest matching suffix in the corpus and returns the next words.
|
| 171 |
+
'''
|
| 172 |
+
for i in range(length):
|
| 173 |
+
next_word = self.PredictNextWord(text_string)
|
| 174 |
+
if next_word == "<|endoftext|>":
|
| 175 |
+
break
|
| 176 |
+
text_string += " " + next_word
|
| 177 |
+
return text_string
|
| 178 |
+
|
| 179 |
if __name__ == "__main__":
|
| 180 |
aglab = AgLab("You are a helpful assistant called ag lab llm.")
|
| 181 |
print(aglab.AskAgLabLLM("What is the capital of France, also what is your name?"))
|
| 182 |
+
print(aglab.RandomEmojiSequence(10))
|
| 183 |
+
print(aglab.SummarizeText("The quick brown fox jumps over the lazy dog."))
|
| 184 |
+
print(aglab.TurnToBulletPoints("The quick brown fox jumps over the lazy dog."))
|
| 185 |
+
print(aglab.RandomTextSequence(10))
|
| 186 |
+
print(aglab.PredictNextWord("Artificial"))
|
| 187 |
+
print(aglab.GenerateLocalText("Artificial", 10))
|