Spaces:
No application file
No application file
| import json, re, ast, streamlit as st | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| model_id = "mistralai/Mistral-7B-Instruct-v0.3" | |
| tok = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto") | |
| gen = pipeline("text-generation", model=model, tokenizer=tok, | |
| max_new_tokens=256, do_sample=False, return_full_text=False) | |
| prompt = """Extract skills and knowledge from the text. | |
| Return JSON: {"SKILL":[...], "KNOWLEDGE":[...]}. | |
| Text: {text} | |
| JSON:""" | |
| def extract(text: str): | |
| out = gen(prompt.format(text=text)) | |
| raw = out[0].get("generated_text") or out[0].get("text") or str(out[0]) | |
| m = re.search(r"\{[\s\S]*\}", raw) | |
| data = {} | |
| if m: | |
| blob = m.group(0) | |
| for parser in (json.loads, ast.literal_eval): | |
| try: | |
| data = parser(blob); break | |
| except Exception: pass | |
| if not isinstance(data, dict): | |
| data = {} | |
| return {"SKILL": data.get("SKILL", []), "KNOWLEDGE": data.get("KNOWLEDGE", [])} | |
| st.title("Skill/Knowledge Extractor") | |
| text = st.text_area("Paste text") | |
| if st.button("Extract") and text.strip(): | |
| st.json(extract(text)) |