NotRev commited on
Commit
3dc09f6
·
verified ·
1 Parent(s): 739c533

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +40 -32
src/streamlit_app.py CHANGED
@@ -1,33 +1,41 @@
1
- import json, re, ast, streamlit as st
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
-
4
- model_id = "mistralai/Mistral-7B-Instruct-v0.3"
5
- tok = AutoTokenizer.from_pretrained(model_id)
6
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
7
- gen = pipeline("text-generation", model=model, tokenizer=tok,
8
- max_new_tokens=256, do_sample=False, return_full_text=False)
9
-
10
- prompt = """Extract skills and knowledge from the text.
11
- Return JSON: {"SKILL":[...], "KNOWLEDGE":[...]}.
12
- Text: {text}
13
- JSON:"""
14
-
15
- def extract(text: str):
16
- out = gen(prompt.format(text=text))
17
- raw = out[0].get("generated_text") or out[0].get("text") or str(out[0])
18
- m = re.search(r"\{[\s\S]*\}", raw)
19
- data = {}
20
- if m:
21
- blob = m.group(0)
22
- for parser in (json.loads, ast.literal_eval):
23
- try:
24
- data = parser(blob); break
25
- except Exception: pass
26
- if not isinstance(data, dict):
27
- data = {}
28
- return {"SKILL": data.get("SKILL", []), "KNOWLEDGE": data.get("KNOWLEDGE", [])}
29
-
30
- st.title("Skill/Knowledge Extractor")
31
- text = st.text_area("Paste text")
32
- if st.button("Extract") and text.strip():
 
 
 
 
 
 
 
 
33
  st.json(extract(text))
 
1
+ import json, re, ast, streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+
4
+ model_id = "mistralai/Mistral-7B-Instruct-v0.3"
5
+ tok = AutoTokenizer.from_pretrained(model_id)
6
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
7
+ gen = pipeline("text-generation", model=model, tokenizer=tok,
8
+ max_new_tokens=256, do_sample=False, return_full_text=False)
9
+
10
+ prompt = """Extract skills and knowledge from the text.
11
+ Return JSON: {"SKILL":[...], "KNOWLEDGE":[...]}.
12
+ Text: {text}
13
+ JSON:"""
14
+
15
+ def extract(text: str):
16
+ out = gen(prompt.format(text=text))
17
+ raw = out[0].get("generated_text") or out[0].get("text") or str(out[0])
18
+ m = re.search(r"\{[\s\S]*\}", raw)
19
+ data = {}
20
+ if m:
21
+ blob = m.group(0)
22
+ for parser in (json.loads, ast.literal_eval):
23
+ try:
24
+ data = parser(blob); break
25
+ except Exception: pass
26
+ if not isinstance(data, dict):
27
+ return {
28
+ "SKILL": ["(Error: Invalid/Corrupted Model Output)"],
29
+ "KNOWLEDGE": [],
30
+ "DEBUG_RAW_OUTPUT": raw
31
+ }
32
+
33
+ return {
34
+ "SKILL": data.get("SKILL", []),
35
+ "KNOWLEDGE": data.get("KNOWLEDGE", [])
36
+ }
37
+
38
+ st.title("Skill/Knowledge Extractor")
39
+ text = st.text_area("Paste text")
40
+ if st.button("Extract") and text.strip():
41
  st.json(extract(text))