NotRev commited on
Commit
421f83d
·
verified ·
1 Parent(s): f233d50

Upload app.py

Browse files
Files changed (1) hide show
  1. src/app.py +33 -0
src/app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json, re, ast, streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+
4
+ model_id = "mistralai/Mistral-7B-Instruct-v0.3"
5
+ tok = AutoTokenizer.from_pretrained(model_id)
6
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
7
+ gen = pipeline("text-generation", model=model, tokenizer=tok,
8
+ max_new_tokens=256, do_sample=False, return_full_text=False)
9
+
10
+ prompt = """Extract skills and knowledge from the text.
11
+ Return JSON: {"SKILL":[...], "KNOWLEDGE":[...]}.
12
+ Text: {text}
13
+ JSON:"""
14
+
15
+ def extract(text: str):
16
+ out = gen(prompt.format(text=text))
17
+ raw = out[0].get("generated_text") or out[0].get("text") or str(out[0])
18
+ m = re.search(r"\{[\s\S]*\}", raw)
19
+ data = {}
20
+ if m:
21
+ blob = m.group(0)
22
+ for parser in (json.loads, ast.literal_eval):
23
+ try:
24
+ data = parser(blob); break
25
+ except Exception: pass
26
+ if not isinstance(data, dict):
27
+ data = {}
28
+ return {"SKILL": data.get("SKILL", []), "KNOWLEDGE": data.get("KNOWLEDGE", [])}
29
+
30
+ st.title("Skill/Knowledge Extractor")
31
+ text = st.text_area("Paste text")
32
+ if st.button("Extract") and text.strip():
33
+ st.json(extract(text))