File size: 2,706 Bytes
b67629f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import requests
import tiktoken

import re

def token_length(text):
    """
    Count the number of words in a given string and multiply to approximate tokens
    
    Args:
        text (str): The input string.
        
    Returns:
        int: The approximate number of tokens in the input string.
    """
    # Remove leading/trailing whitespace from the string
    text = text.strip()
    # Split the string by whitespace characters to get a list of words
    words = re.split(r'\s+', text)
    # Count the number of non-empty words
    word_count = sum(1 for word in words if word)
    return int(word_count*1.25)

def LLM_COMPLETION(prompt, context_size=None, preset='Divine Intellect', repetition_penalty=1.17, temperature = 1.31, top_p = 0.14, api_port=5000, guidance_scale = 1, negative_prompt = ""):
#   Calls OPENAI-style completion endpoint, written for text-generation-webui

#   For local streaming, the textgen websockets are hosted without ssl - http://
    HOST = 'localhost:'+str(api_port)
    URI = f'http://{HOST}/v1/completions'

    if context_size==None:
        context_max=4096
    else:
        context_max=context_size

    max_new_tokens=context_max-token_length(prompt)

    request = {
        'prompt': prompt,
        'max_tokens': max_new_tokens,
        'auto_max_new_tokens': False,
        'max_tokens_second': 0,
        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
        'preset': preset,
        'do_sample': True,
        'temperature': temperature,
        'top_p': top_p,
        'typical_p': 1,
        'epsilon_cutoff': 0,  # In units of 1e-4
        'eta_cutoff': 0,  # In units of 1e-4
        'tfs': 1,
        'top_a': 0,
        'repetition_penalty': repetition_penalty,
        'repetition_penalty_range': 0,
        'top_k': 49,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
        'grammar_string': '',
        'guidance_scale': guidance_scale,
        'negative_prompt': negative_prompt,
        'seed': -1,
        'add_bos_token': True,
        'truncation_length': context_max,
        'ban_eos_token': False,
        'custom_token_bans': '',
        'skip_special_tokens': True,
        'stop': ["</s>"]
    }

    response = requests.post(URI, json=request)

    if response.status_code == 200:
        result = response.json()['choices'][0]['text']
        return result
    else:
        print(response.status_code)