AGofficial
/

AiLab

English

Model card Files Files and versions

xet

Community

AGofficial commited on Apr 21, 2025

Commit

e5e4fa5

verified ·

1 Parent(s): 6f0338c

Upload aglab.py

Browse files

Files changed (1) hide show

aglab.py +142 -0

aglab.py CHANGED Viewed

@@ -1,8 +1,66 @@
 from model import AIModel
 class AgLab:
     def __init__(self, system_prompt: str = ""):
         self.system_prompt = system_prompt
     def __raw_ask(self, prompt: str) -> str:
         '''
@@ -39,7 +97,91 @@ class AgLab:
         full_prompt = " Summarize the following text: " + text + " send only the summary of the previous text, do not reply, send only the summary."
         response = self.__raw_ask(full_prompt)
         return response
 if __name__ == "__main__":
     aglab = AgLab("You are a helpful assistant called ag lab llm.")
     print(aglab.AskAgLabLLM("What is the capital of France, also what is your name?"))

 from model import AIModel
+import random
+import os
+import requests
+from bs4 import BeautifulSoup
+def fetch_wikipedia_text(title):
+    url = f"https://en.wikipedia.org/api/rest_v1/page/html/{title}"
+    headers = {"User-Agent": "Mozilla/5.0"}
+    response = requests.get(url, headers=headers)
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.content, "html.parser")
+        return soup.get_text()
+    else:
+        print(f"Failed to fetch '{title}'")
+        return ""
+def collect_wikipedia_data(titles, min_chars=25000):
+    all_text = ""
+    for title in titles:
+        print(f"Fetching: {title}")
+        text = fetch_wikipedia_text(title)
+        all_text += f"\n\n=== {title.replace('_', ' ')} ===\n\n{text}"
+        if len(all_text) >= min_chars:
+            break
+    return all_text
+file_path = 'agailab/data.txt'
+if not os.path.exists(file_path):
+    os.makedirs(os.path.dirname(file_path), exist_ok=True)
+    article_titles = [
+        "Artificial_intelligence", "Machine_learning", "Deep_learning", "Neural_network",
+        "Natural_language_processing", "Computer_vision", "Reinforcement_learning",
+        "Supervised_learning", "Unsupervised_learning", "Turing_test", "ChatGPT",
+        "Large_language_model", "OpenAI", "Automation", "Data_science", "AI_ethics",
+        "Robotics", "Cognitive_computing", "Computer_science", "Algorithm",
+        "Big_data", "Pattern_recognition", "Knowledge_representation", "Expert_system", "Intelligent_agent"
+    ]
+    text_data = collect_wikipedia_data(article_titles)
+    with open(file_path, 'w', encoding='utf-8') as f:
+        f.write(text_data)
+    print(f"Saved {len(text_data)} characters of Wikipedia text to {file_path}")
+else:
+    print("File already exists.")
+with open(file_path, 'r', encoding='utf-8') as f:
+    words = f.read().split()
+with open(file_path, 'r', encoding='utf-8') as f:
+    corpus = f.read()
 class AgLab:
     def __init__(self, system_prompt: str = ""):
         self.system_prompt = system_prompt
+        with open(file_path, 'r', encoding='utf-8') as f:
+            self.corpus = f.read()
+        self.words = self.corpus.split()
     def __raw_ask(self, prompt: str) -> str:
         '''
         full_prompt = " Summarize the following text: " + text + " send only the summary of the previous text, do not reply, send only the summary."
         response = self.__raw_ask(full_prompt)
         return response
+    def TurnToBulletPoints(self, text: str) -> str:
+        '''
+        Turn a text into bullet points using the AgLab LLM and get a response as text.
+        '''
+        full_prompt = " Turn the following text into bullet points: " + text + " send only the bullet points of the previous text, do not reply, send only the bullet points."
+        response = self.__raw_ask(full_prompt)
+        return response
+    def RandomEmojiSequence(self, length: int = 5) -> str:
+        '''
+        Generate a random sequence of emojis.
+        '''
+        emojis = [
+            "😀", "😂", "😃", "😄", "😅", "😆", "😉", "😊", "😋", "😎",
+            "😍", "😘", "😗", "😙", "😚", "😇", "😈", "👿", "👻", "💀",
+            "🤖", "👽", "👾", "🎃", "😺", "😸", "😹", "😻", "😼", "😽",
+            "🙀", "😿", "😾", "🐶", "🐱", "🐭", "🐹", "🐰", "🦊", "🐻",
+            "🐼", "🐨", "🐯", "🦁", "🐮", "🐷", "🐽", "🐸", "🐵", "🙈",
+            "🙉", "🙊", "🐒", "🐔", "🐧", "🐦", "🐤", "🐣", "🐥", "🦆",
+            "🦅", "🦉", "🦇", "🐺", "🐗", "🐴", "🦄", "🐝", "🐛", "🦋",
+            "🐌", "🐞", "🐜", "🪲", "🪳", "🕷", "🕸", "🐢", "🐍", "🦎",
+            "🦂", "🦀", "🦞", "🦐", "🦑", "🐙", "🪸", "🐠", "🐟", "🐡",
+            "🦈", "🐬", "🐳", "🐋", "🦭", "🐊", "🦧", "🦍", "🦣", "🐘",
+            "🦏", "🦛", "🐪", "🐫", "🦒", "🦘", "🦬", "🐃", "🐂", "🐄",
+            "🐎", "🐖", "🐏", "🐑", "🦙", "🐐", "🦌", "🐕", "🐩", "🦮",
+            "🐕‍🦺", "🐈", "🐈‍⬛", "🪶", "🐓", "🦃", "🦤", "🦚", "🦜",
+            "🦢", "🦩", "🕊", "🐇", "🦝", "🦨", "🦡", "🦫", "🦦", "🦥",
+            "🐁", "🐀", "🐿", "🦔", "🐾", "🐉", "🐲", "🌵", "🎄", "🌲",
+            "🌳", "🌴", "🪵", "🌱", "🌿", "☘", "🍀", "🎍", "🪴", "🎋",
+            "🍃", "🍂", "🍁", "🍄", "🐚", "🪨", "🌾", "💐", "🌷", "🌹",
+            "🥀", "🌺", "🌸", "🌼", "🌻", "🌞", "🌝", "🌛", "🌜", "🌚",
+            "🌕", "🌖", "🌗", "🌘", "🌑", "🌒", "🌓", "🌔", "🌙", "🌎",
+            "🌍", "🌏", "🪐", "💫", "⭐", "🌟", "✨", "⚡", "☄", "💥",
+            "🔥", "🌪", "🌈", "☀", "🌤", "⛅", "🌥", "☁", "🌦", "🌧",
+            "⛈", "🌩", "🌨", "❄", "☃", "⛄", "🌬", "💨", "💧", "💦",
+            "☔", "☂", "🌊", "🌫"
+        ]
+        return ''.join(random.choice(emojis) for _ in range(length))
+    def RandomTextSequence(self, length: int = 5) -> str:
+        '''
+        Generate a random sequence of words.
+        '''
+        return ' '.join(random.choice(words) for _ in range(length))
+    def PredictNextWord(self, text_string: str) -> str:
+        '''
+        Predict the next word in a given text string using a local LLM approach.
+        It finds the longest matching suffix in the corpus and returns the next word.
+        '''
+        text_words = text_string.split()
+        if not text_words:
+            return ""
+        for i in range(len(text_words), 0, -1):
+            sequence = ' '.join(text_words[-i:])
+            pattern = f"{sequence} "
+            start_index = self.corpus.find(pattern)
+            if start_index != -1:
+                end_index = start_index + len(pattern)
+                remaining = self.corpus[end_index:].strip()
+                if remaining:
+                    return remaining.split()[0]
+        return "<|endoftext|>"
+    def GenerateLocalText(self, text_string: str, length=10) -> str:
+        '''
+        Generate text based on a given text string using a local LLM approach.
+        It finds the longest matching suffix in the corpus and returns the next words.
+        '''
+        for i in range(length):
+            next_word = self.PredictNextWord(text_string)
+            if next_word == "<|endoftext|>":
+                break
+            text_string += " " + next_word
+        return text_string
 if __name__ == "__main__":
     aglab = AgLab("You are a helpful assistant called ag lab llm.")
     print(aglab.AskAgLabLLM("What is the capital of France, also what is your name?"))
+    print(aglab.RandomEmojiSequence(10))
+    print(aglab.SummarizeText("The quick brown fox jumps over the lazy dog."))
+    print(aglab.TurnToBulletPoints("The quick brown fox jumps over the lazy dog."))
+    print(aglab.RandomTextSequence(10))
+    print(aglab.PredictNextWord("Artificial"))
+    print(aglab.GenerateLocalText("Artificial", 10))