Spaces:
No application file
No application file
File size: 1,253 Bytes
4a92139 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import json, re, ast, streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
tok = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
gen = pipeline("text-generation", model=model, tokenizer=tok,
max_new_tokens=256, do_sample=False, return_full_text=False)
prompt = """Extract skills and knowledge from the text.
Return JSON: {"SKILL":[...], "KNOWLEDGE":[...]}.
Text: {text}
JSON:"""
def extract(text: str):
out = gen(prompt.format(text=text))
raw = out[0].get("generated_text") or out[0].get("text") or str(out[0])
m = re.search(r"\{[\s\S]*\}", raw)
data = {}
if m:
blob = m.group(0)
for parser in (json.loads, ast.literal_eval):
try:
data = parser(blob); break
except Exception: pass
if not isinstance(data, dict):
data = {}
return {"SKILL": data.get("SKILL", []), "KNOWLEDGE": data.get("KNOWLEDGE", [])}
st.title("Skill/Knowledge Extractor")
text = st.text_area("Paste text")
if st.button("Extract") and text.strip():
st.json(extract(text)) |