Spaces:
Runtime error
Runtime error
Andreas Sünder
commited on
Commit
·
fce98ea
0
Parent(s):
Add files from previous repo
Browse files- .gitattributes +35 -0
- .gitignore +1 -0
- README.md +14 -0
- app.py +91 -0
- datasets/lda_poe_topics.csv +40 -0
- model.py +25 -0
- requirements.txt +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
README.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Topic Labelling Playground
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: 1.27.2
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: other
|
| 11 |
+
models:
|
| 12 |
+
- textminr/llama-2-7b-4bit-gptq
|
| 13 |
+
- textminr/llama-2-7b-chat-4bit-gptq
|
| 14 |
+
---
|
app.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import list_models
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from model import ReplicateModel
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
DATASETS_PATH = 'datasets'
|
| 9 |
+
|
| 10 |
+
models = {
|
| 11 |
+
'mistral': ReplicateModel('mistralai/mistral-7b-instruct-v0.1:83b6a56e7c828e667f21fd596c338fd4f0039b46bcfa18d973e8e70e455fda70'),
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
prompts = {
|
| 15 |
+
'simple_prompt':
|
| 16 |
+
'''
|
| 17 |
+
I have topic that is described by the following keywords: [KEYWORDS]
|
| 18 |
+
|
| 19 |
+
Based on the information above, extract a short topic label in the following format:
|
| 20 |
+
topic: <topic label>
|
| 21 |
+
'''
|
| 22 |
+
# 'custom_prompt': ''
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
topicsets = {
|
| 26 |
+
'lda_poe_topics': os.path.join(DATASETS_PATH, 'lda_poe_topics.csv'),
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
@st.cache_data(show_spinner=False)
|
| 30 |
+
def get_available_models():
|
| 31 |
+
# return [model.modelId for model in list_models(author='textminr')]
|
| 32 |
+
return models.keys()
|
| 33 |
+
|
| 34 |
+
@st.cache_resource(show_spinner='Loading model...')
|
| 35 |
+
def load_model(model_name: str):
|
| 36 |
+
# model = AutoGPTQForCausalLM.from_quantized(model_name, device_map='auto')
|
| 37 |
+
# return pipeline('text-generation', model=model, tokenizer=model_name)
|
| 38 |
+
return models[model_name].load()
|
| 39 |
+
|
| 40 |
+
st.set_page_config(page_title='TL playground', page_icon='🚀', layout='wide')
|
| 41 |
+
st.title('🚀 Topic Labelling playground')
|
| 42 |
+
|
| 43 |
+
percentage_width_main = 70
|
| 44 |
+
st.markdown(
|
| 45 |
+
f'''<style>
|
| 46 |
+
@media only screen and (min-width: 1500px) {{
|
| 47 |
+
.appview-container .main .block-container{{
|
| 48 |
+
max-width: {percentage_width_main}%;
|
| 49 |
+
}}
|
| 50 |
+
}}
|
| 51 |
+
</style>
|
| 52 |
+
''',
|
| 53 |
+
unsafe_allow_html=True,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
col1, col2 = st.columns(2, gap='medium')
|
| 57 |
+
|
| 58 |
+
sel_model_name = col1.selectbox('Select a model', models, index=None, placeholder='Select a model')
|
| 59 |
+
if sel_model_name:
|
| 60 |
+
model = load_model(sel_model_name)
|
| 61 |
+
|
| 62 |
+
sel_dataset_name = col1.selectbox('Select a dataset', topicsets.keys(), index=None)
|
| 63 |
+
if sel_dataset_name:
|
| 64 |
+
sel_dataset = pd.read_csv(topicsets[sel_dataset_name], header=None)
|
| 65 |
+
col1.dataframe(sel_dataset)
|
| 66 |
+
|
| 67 |
+
sel_row_index = col1.selectbox('Select a row', sel_dataset.index)
|
| 68 |
+
|
| 69 |
+
sel_prompt = col2.selectbox('Select a prompt', prompts.keys())
|
| 70 |
+
if sel_prompt != 'custom_prompt':
|
| 71 |
+
col2.code(prompts[sel_prompt], language='text')
|
| 72 |
+
sel_prompt_text = prompts[sel_prompt]
|
| 73 |
+
else:
|
| 74 |
+
sel_prompt_text = st.text_area('Custom prompt', height=200)
|
| 75 |
+
col2.caption('Make sure to use "[KEYWORDS]" to indicate where the keywords should be inserted.')
|
| 76 |
+
|
| 77 |
+
btn_generate = col2.button('Generate', disabled=(sel_model_name is None or sel_dataset_name is None))
|
| 78 |
+
if btn_generate:
|
| 79 |
+
keywords = ','.join(sel_dataset.iloc[sel_row_index].tolist()[1:])
|
| 80 |
+
|
| 81 |
+
placeholder = col2.empty()
|
| 82 |
+
with placeholder, st.spinner('Generating...'):
|
| 83 |
+
prompt = sel_prompt_text.replace('[KEYWORDS]', keywords)
|
| 84 |
+
# result = model(prompt, max_new_tokens=100, return_full_text=False)[0]['generated_text']
|
| 85 |
+
result = model.generate(prompt)
|
| 86 |
+
|
| 87 |
+
message = col2.chat_message("ai")
|
| 88 |
+
message.write(result)
|
| 89 |
+
message.caption('Keywords: ' + keywords)
|
| 90 |
+
|
| 91 |
+
|
datasets/lda_poe_topics.csv
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Topic 1,howev,even,found,period,altogeth,imposs,precis,perhap,short,prove
|
| 2 |
+
Topic 2,water,wind,black,vessel,larg,sea,river,white,heavi,float
|
| 3 |
+
Topic 3,beauti,shall,name,whose,new,tree,angel,find,flower,fair
|
| 4 |
+
Topic 4,will,say,mean,rememb,speak,know,none,jupit,dare,limb
|
| 5 |
+
Topic 5,call,may,doubt,matter,question,exist,mind,beyond,term,now
|
| 6 |
+
Topic 6,one,everi,thing,moment,just,instant,anoth,almost,inde,frame
|
| 7 |
+
Topic 7,point,must,certain,fact,case,view,captain,given,consid,import
|
| 8 |
+
Topic 8,immedi,found,near,saw,state,now,present,sever,discov,approach
|
| 9 |
+
Topic 9,made,make,way,attempt,get,forc,difficulti,account,escap,effort
|
| 10 |
+
Topic 10,two,three,feet,year,hour,half,four,minut,thousand,hundr
|
| 11 |
+
Topic 11,whole,form,earth,small,around,figur,stood,portion,surfac,vast
|
| 12 |
+
Topic 12,upon,eye,look,fell,face,floor,fall,spot,depend,tabl
|
| 13 |
+
Topic 13,one,hope,power,leav,follow,entir,scarc,consider,pass,mad
|
| 14 |
+
Topic 14,man,old,die,away,live,ladi,young,busi,gentleman,pass
|
| 15 |
+
Topic 15,time,first,long,second,keep,thought,circumst,instanc,letter,care
|
| 16 |
+
Topic 16,much,evid,gave,although,truth,express,mean,seem,sens,felt
|
| 17 |
+
Topic 17,like,light,ever,life,dream,dark,moon,wild,deep,appear
|
| 18 |
+
Topic 18,effect,appear,step,upon,except,found,caus,event,discov,produc
|
| 19 |
+
Topic 19,day,night,last,long,continu,cours,late,arriv,bring,raven
|
| 20 |
+
Topic 20,lie,side,full,went,length,peter,augustus,lay,deck,board
|
| 21 |
+
Topic 21,far,air,seem,heaven,high,sun,breath,grew,atmospher,rise
|
| 22 |
+
Topic 22,now,becam,soon,distinct,object,absolut,necessari,appar,render,felt
|
| 23 |
+
Topic 23,poem,poe,origin,work,first,read,poet,note,paper,line
|
| 24 |
+
Topic 24,great,degre,island,sea,reach,set,land,strong,measur,sight
|
| 25 |
+
Topic 25,natur,feel,excit,interest,true,differ,intens,result,principl,peculiar
|
| 26 |
+
Topic 26,less,part,appear,thus,regard,posit,person,number,greater,mention
|
| 27 |
+
Topic 27,hand,left,place,take,took,right,arm,hold,put,end
|
| 28 |
+
Topic 28,without,bodi,reason,believ,suppos,corps,becom,madam,murder,least
|
| 29 |
+
Topic 29,still,idea,think,fanci,human,dead,possess,impress,smile,wonder
|
| 30 |
+
Topic 30,death,voic,sound,bell,heard,ear,low,without,fire,proceed
|
| 31 |
+
Topic 31,word,let,thus,attent,utter,spoke,alon,gone,repeat,scene
|
| 32 |
+
Topic 32,head,turn,came,upon,back,extrem,sudden,come,near,round
|
| 33 |
+
Topic 33,mani,manner,charact,particular,subject,singular,success,weather,alway,articl
|
| 34 |
+
Topic 34,littl,mere,remain,purpos,better,longer,suffer,use,wish,home
|
| 35 |
+
Topic 35,now,thou,friend,sure,inde,say,repli,art,fear,sir
|
| 36 |
+
Topic 36,well,good,among,observ,world,general,known,men,may,knew
|
| 37 |
+
Topic 37,said,might,possibl,thought,taken,king,still,bird,yes,dupin
|
| 38 |
+
Topic 38,open,door,within,close,room,chamber,wall,enter,main,box
|
| 39 |
+
Topic 39,never,can,noth,see,yet,seen,even,done,know,eye
|
| 40 |
+
Topic 40,love,thi,heart,soul,spirit,thee,god,shadow,within,passion
|
model.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
import replicate
|
| 3 |
+
|
| 4 |
+
class GenericModel:
|
| 5 |
+
def __init__(self, name: str):
|
| 6 |
+
self.name = name
|
| 7 |
+
|
| 8 |
+
@abstractmethod
|
| 9 |
+
def load(self):
|
| 10 |
+
pass
|
| 11 |
+
|
| 12 |
+
@abstractmethod
|
| 13 |
+
def generate(self, prompt: str):
|
| 14 |
+
pass
|
| 15 |
+
|
| 16 |
+
class ReplicateModel(GenericModel):
|
| 17 |
+
def __init__(self, name: str):
|
| 18 |
+
super().__init__(name)
|
| 19 |
+
|
| 20 |
+
def load(self):
|
| 21 |
+
return self
|
| 22 |
+
|
| 23 |
+
def generate(self, prompt: str):
|
| 24 |
+
iterator = replicate.run(self.name, input={'prompt': prompt})
|
| 25 |
+
return ''.join(item for item in iterator)
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers @ git+https://github.com/huggingface/transformers
|
| 2 |
+
replicate
|
| 3 |
+
auto_gptq
|