NotRev commited on
Commit
4a92139
·
verified ·
1 Parent(s): 6ef4aa5

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +33 -0
  2. predictions_train.jsonl +0 -0
  3. requirements.txt +2 -2
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json, re, ast, streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+
4
+ model_id = "mistralai/Mistral-7B-Instruct-v0.3"
5
+ tok = AutoTokenizer.from_pretrained(model_id)
6
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
7
+ gen = pipeline("text-generation", model=model, tokenizer=tok,
8
+ max_new_tokens=256, do_sample=False, return_full_text=False)
9
+
10
+ prompt = """Extract skills and knowledge from the text.
11
+ Return JSON: {"SKILL":[...], "KNOWLEDGE":[...]}.
12
+ Text: {text}
13
+ JSON:"""
14
+
15
+ def extract(text: str):
16
+ out = gen(prompt.format(text=text))
17
+ raw = out[0].get("generated_text") or out[0].get("text") or str(out[0])
18
+ m = re.search(r"\{[\s\S]*\}", raw)
19
+ data = {}
20
+ if m:
21
+ blob = m.group(0)
22
+ for parser in (json.loads, ast.literal_eval):
23
+ try:
24
+ data = parser(blob); break
25
+ except Exception: pass
26
+ if not isinstance(data, dict):
27
+ data = {}
28
+ return {"SKILL": data.get("SKILL", []), "KNOWLEDGE": data.get("KNOWLEDGE", [])}
29
+
30
+ st.title("Skill/Knowledge Extractor")
31
+ text = st.text_area("Paste text")
32
+ if st.button("Extract") and text.strip():
33
+ st.json(extract(text))
predictions_train.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- altair
2
- pandas
3
  streamlit
 
1
+ transformers
2
+ accelerate
3
  streamlit