Sibawayh / allam.py
mghareeb32
More questions.
77a11be
import os
from ibm_watsonx_ai import Credentials
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames
from ibm_watsonx_ai.foundation_models import ModelInference
import Levenshtein
import random
def q_parameterize(text):
parts = text.split('####')
return "####".join(parts[:-1]) + """
####
Input: XXXX
Output:"""
def QBANK():
ans = []
for fname in os.listdir("qbank"):
with open(f"qbank/{fname}", 'r', encoding="utf-8") as file:
text = file.read()
parts = text.split("====")
ans.append({
'name': parts[0].strip(),
'category': int(fname[0]),
'prompt': q_parameterize(parts[1]),
})
return ans
def QBANK_closest(text):
min_i = 0
min_d = len(text) * 10
for i, q in enumerate(QBANK):
d = Levenshtein.distance(q, text)
if d < min_d:
min_d = d
min_i = i
return min_i
def CORPUSES():
ans = []
for fname in os.listdir("txtbank"):
with open(f"txtbank/{fname}", 'r', encoding="utf-8") as file:
text = file.read()
ans.append(
text.replace("\n", " ").strip(),
)
return ans
def evaluate(c, q, a):
grade = -1
while grade < 0:
try:
model = ModelInference(
model_id="sdaia/allam-1-13b-instruct",
credentials=Credentials(
api_key = 'iYwxSbn8Kxe3UjV2OMS9tSe1dxPlgndTamTwET6-_GLX',
url="https://eu-de.ml.cloud.ibm.com",
),
project_id="89e6b2dd-fcc5-4ee2-af0b-8e6d735493ed",
params={
GenTextParamsMetaNames.DECODING_METHOD: "sample",
GenTextParamsMetaNames.MIN_NEW_TOKENS: 1,
GenTextParamsMetaNames.MAX_NEW_TOKENS: 64,
GenTextParamsMetaNames.STOP_SEQUENCES: ["####"],
GenTextParamsMetaNames.TEMPERATURE: 1,
GenTextParamsMetaNames.TOP_K: 50,
GenTextParamsMetaNames.TOP_P: .95,
GenTextParamsMetaNames.REPETITION_PENALTY: 1,
},
)
query = open('anseval.txt', 'r', encoding="utf-8").\
read().\
replace("CCCC", c).\
replace("XXXX", q).\
replace("YYYY", a)
print(query)
resp = model.generate(query)
resp_text = "".join([r['generated_text'] for r in resp['results']])\
.split("####")[0]\
.split("Input:")[0]\
.split("Output:")[0]\
.strip()
grade = max(0, min(10, float(resp_text)))
except Exception:
pass
print(grade)
return round(grade) / 10.0
def gen_questions(q, prompt_param):
result = []
try:
model = ModelInference(
model_id="sdaia/allam-1-13b-instruct",
credentials=Credentials(
api_key = 'iYwxSbn8Kxe3UjV2OMS9tSe1dxPlgndTamTwET6-_GLX',
url="https://eu-de.ml.cloud.ibm.com",
),
project_id="89e6b2dd-fcc5-4ee2-af0b-8e6d735493ed",
params={
GenTextParamsMetaNames.DECODING_METHOD: "sample",
GenTextParamsMetaNames.MIN_NEW_TOKENS: 1,
GenTextParamsMetaNames.MAX_NEW_TOKENS: 64,
GenTextParamsMetaNames.STOP_SEQUENCES: ["####"],
GenTextParamsMetaNames.TEMPERATURE: 1,
GenTextParamsMetaNames.TOP_K: 50,
GenTextParamsMetaNames.TOP_P: .95,
GenTextParamsMetaNames.REPETITION_PENALTY: 1,
},
)
resp = model.generate(q['prompt'].replace("XXXX", prompt_param))
resp_text = "".join([r['generated_text'] for r in resp['results']])\
.split("####")[0]\
.split("Input:")[0]\
.split("Output:")[0]\
.strip()
if "..." not in q['name']:
result.append({
'Q': q['name'],
'A': resp_text,
'category': q['category'],
})
else:
for a in resp_text.split("- "):
ap = None
if ": " in a:
ap = a.split(": ")
elif "؟ " in a:
ap = a.split("؟ ")
if ap is not None:
result.append({
'Q': q['name'].replace("...", ap[0].strip()),
'A': ap[1].split("\n")[0].strip(),
'category': q['category'],
})
if len(result) > 3:
random.shuffle(result)
N = max(1, min(5, len(result) // 2))
result = result[:N]
return result
except Exception as e:
print(e)
return result
if __name__ == "__main__":
for q in QBANK():
text = gen_questions(q, CORPUSES()[0])
print(text)