ThesisTestRun / streamlit_app.py
NotRev's picture
Rename app.py to streamlit_app.py
0483c24 verified
import json, re, ast, streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
tok = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
gen = pipeline("text-generation", model=model, tokenizer=tok,
max_new_tokens=256, do_sample=False, return_full_text=False)
prompt = """Extract skills and knowledge from the text.
Return JSON: {"SKILL":[...], "KNOWLEDGE":[...]}.
Text: {text}
JSON:"""
def extract(text: str):
out = gen(prompt.format(text=text))
raw = out[0].get("generated_text") or out[0].get("text") or str(out[0])
m = re.search(r"\{[\s\S]*\}", raw)
data = {}
if m:
blob = m.group(0)
for parser in (json.loads, ast.literal_eval):
try:
data = parser(blob); break
except Exception: pass
if not isinstance(data, dict):
data = {}
return {"SKILL": data.get("SKILL", []), "KNOWLEDGE": data.get("KNOWLEDGE", [])}
st.title("Skill/Knowledge Extractor")
text = st.text_area("Paste text")
if st.button("Extract") and text.strip():
st.json(extract(text))