import json, re, ast, streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline model_id = "mistralai/Mistral-7B-Instruct-v0.3" tok = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto") gen = pipeline("text-generation", model=model, tokenizer=tok, max_new_tokens=256, do_sample=False, return_full_text=False) prompt = """Extract skills and knowledge from the text. Return JSON: {"SKILL":[...], "KNOWLEDGE":[...]}. Text: {text} JSON:""" def extract(text: str): out = gen(prompt.format(text=text)) raw = out[0].get("generated_text") or out[0].get("text") or str(out[0]) m = re.search(r"\{[\s\S]*\}", raw) data = {} if m: blob = m.group(0) for parser in (json.loads, ast.literal_eval): try: data = parser(blob); break except Exception: pass if not isinstance(data, dict): data = {} return {"SKILL": data.get("SKILL", []), "KNOWLEDGE": data.get("KNOWLEDGE", [])} st.title("Skill/Knowledge Extractor") text = st.text_area("Paste text") if st.button("Extract") and text.strip(): st.json(extract(text))