bmconrad commited on
Commit
d5fba32
·
1 Parent(s): 297a762

initial updates

Browse files
Files changed (4) hide show
  1. app.py +76 -3
  2. esv_embeddings.pt +3 -0
  3. requirements.txt +91 -1
  4. t_esv.csv +0 -0
app.py CHANGED
@@ -1,11 +1,83 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
9
 
10
  def respond(
11
  message,
@@ -23,10 +95,11 @@ def respond(
23
  if val[1]:
24
  messages.append({"role": "assistant", "content": val[1]})
25
 
 
26
  messages.append({"role": "user", "content": message})
27
 
28
  response = ""
29
-
30
  for message in client.chat_completion(
31
  messages,
32
  max_tokens=max_tokens,
@@ -45,7 +118,7 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
  gr.Slider(
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from sentence_transformers import SentenceTransformer, util
4
+ import torch
5
+ import pandas as pd
6
+
7
+ def load_bible():
8
+ # Replace: 1=Genesis, 2=Exodus, ... 66=Revelation
9
+ books = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy',
10
+ 'Joshua', 'Judges', 'Ruth', '1 Samuel', '2 Samuel', '1 Kings',
11
+ '2 Kings', '1 Chronicles', '2 Chronicles', 'Ezra', 'Nehemiah',
12
+ 'Esther', 'Job', 'Psalms', 'Proverbs', 'Ecclesiastes',
13
+ 'Song of Solomon', 'Isaiah', 'Jeremiah', 'Lamentations', 'Ezekiel',
14
+ 'Daniel', 'Hosea', 'Joel', 'Amos', 'Obadiah', 'Jonah', 'Micah',
15
+ 'Nahum', 'Habakkuk', 'Zephaniah', 'Haggai', 'Zechariah', 'Malachi',
16
+ 'Matthew', 'Mark', 'Luke', 'John', 'Acts', 'Romans',
17
+ '1 Corinthians', '2 Corinthians', 'Galatians', 'Ephesians',
18
+ 'Philippians', 'Colossians', '1 Thessalonians', '2 Thessalonians',
19
+ '1 Timothy', '2 Timothy', 'Titus', 'Philemon', 'Hebrews', 'James',
20
+ '1 Peter', '2 Peter', '1 John', '2 John', '3 John', 'Jude',
21
+ 'Revelation']
22
+ lookup = {}
23
+ for i, j in enumerate(books):
24
+ lookup[i+1]=j
25
+ lookup
26
+
27
+ esv = pd.read_csv('t_esv.csv').\
28
+ rename(columns = {"b":"book"}).drop(["id"], axis=1)
29
+ esv.book = esv.book.replace(lookup)
30
+ return esv
31
+
32
+ def load_embeddings(fn):
33
+ with open(fn, "rb") as f:
34
+ embeddings = torch.load(f)
35
+ return embeddings
36
+
37
+ def search(searchText, k = 5, show_html=False, return_str=False):
38
+ emb = model.encode(searchText, convert_to_tensor=True)
39
+ cos_scores = util.cos_sim(emb, embeddings)[0]
40
+ top_results = torch.topk(cos_scores, k = k)
41
+
42
+ x = bible.iloc[top_results.indices.cpu().detach().numpy(), :]
43
+
44
+ if show_html:
45
+ for index, row in x.iterrows():
46
+ display(HTML(f'<p style="font-size: 22px;"><strong>{row.book} {row.c}:{row.v}</strong> <em>{row.t}</em></p><br>'))
47
+
48
+ if return_str:
49
+ s = []
50
+ for index, row in x.iterrows():
51
+ tmp = f"({row.book} {row.c}:{row.v}) {row.t}"
52
+ s.append(tmp)
53
+
54
+ return "\n".join(s)
55
+
56
+ else:
57
+ return x
58
+
59
+
60
+ def create_prompt(q):
61
+ ctx = search(q, k=10, return_str = True)
62
+ s = f"""Context:
63
+ {ctx}
64
+ Question:
65
+ {q}
66
+ """
67
+
68
+ return s
69
+
70
+ # Constants
71
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
72
+ embeddings = load_embeddings("esv_embeddings.pt")
73
+ bible = load_bible()
74
 
75
  """
76
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
77
  """
 
78
 
79
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
80
+ # client = InferenceClient("microsoft/Phi-3-mini-4k-instruct")
81
 
82
  def respond(
83
  message,
 
95
  if val[1]:
96
  messages.append({"role": "assistant", "content": val[1]})
97
 
98
+ message = create_prompt(message)
99
  messages.append({"role": "user", "content": message})
100
 
101
  response = ""
102
+
103
  for message in client.chat_completion(
104
  messages,
105
  max_tokens=max_tokens,
 
118
  demo = gr.ChatInterface(
119
  respond,
120
  additional_inputs=[
121
+ gr.Textbox(value="You are a Christian Pastor. Provide spiritual wisdom, based on biblical truth found in the context. Quote from the context when appropriate.", label="System message"),
122
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
123
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
124
  gr.Slider(
esv_embeddings.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97bdae9e2437f18a0078258bee489478b3dfb73f7164365af696055746c79c0d
3
+ size 47772316
requirements.txt CHANGED
@@ -1 +1,91 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.3.0
3
+ annotated-types==0.7.0
4
+ anyio==4.4.0
5
+ attrs==23.2.0
6
+ certifi==2024.7.4
7
+ charset-normalizer==3.3.2
8
+ click==8.1.7
9
+ colorama==0.4.6
10
+ contourpy==1.2.1
11
+ cycler==0.12.1
12
+ dnspython==2.6.1
13
+ email_validator==2.2.0
14
+ fastapi==0.111.0
15
+ fastapi-cli==0.0.4
16
+ ffmpy==0.3.2
17
+ filelock==3.15.4
18
+ fonttools==4.53.1
19
+ fsspec==2024.6.1
20
+ gradio==4.37.2
21
+ gradio_client==1.0.2
22
+ h11==0.14.0
23
+ httpcore==1.0.5
24
+ httptools==0.6.1
25
+ httpx==0.27.0
26
+ huggingface-hub==0.23.4
27
+ idna==3.7
28
+ importlib_resources==6.4.0
29
+ intel-openmp==2021.4.0
30
+ Jinja2==3.1.4
31
+ joblib==1.4.2
32
+ jsonschema==4.23.0
33
+ jsonschema-specifications==2023.12.1
34
+ kiwisolver==1.4.5
35
+ markdown-it-py==3.0.0
36
+ MarkupSafe==2.1.5
37
+ matplotlib==3.9.1
38
+ mdurl==0.1.2
39
+ minijinja==2.0.1
40
+ mkl==2021.4.0
41
+ mpmath==1.3.0
42
+ networkx==3.3
43
+ numpy==1.26.4
44
+ orjson==3.10.6
45
+ packaging==24.1
46
+ pandas==2.2.2
47
+ pillow==10.4.0
48
+ pydantic==2.8.2
49
+ pydantic_core==2.20.1
50
+ pydub==0.25.1
51
+ Pygments==2.18.0
52
+ pyparsing==3.1.2
53
+ python-dateutil==2.9.0.post0
54
+ python-dotenv==1.0.1
55
+ python-multipart==0.0.9
56
+ pytz==2024.1
57
+ PyYAML==6.0.1
58
+ referencing==0.35.1
59
+ regex==2024.5.15
60
+ requests==2.32.3
61
+ rich==13.7.1
62
+ rpds-py==0.19.0
63
+ ruff==0.5.1
64
+ safetensors==0.4.3
65
+ scikit-learn==1.5.1
66
+ scipy==1.14.0
67
+ semantic-version==2.10.0
68
+ sentence-transformers==3.0.1
69
+ setuptools==69.5.1
70
+ shellingham==1.5.4
71
+ six==1.16.0
72
+ sniffio==1.3.1
73
+ starlette==0.37.2
74
+ sympy==1.13.0
75
+ tbb==2021.13.0
76
+ threadpoolctl==3.5.0
77
+ tokenizers==0.19.1
78
+ tomlkit==0.12.0
79
+ toolz==0.12.1
80
+ torch==2.3.1
81
+ tqdm==4.66.4
82
+ transformers==4.42.3
83
+ typer==0.12.3
84
+ typing_extensions==4.12.2
85
+ tzdata==2024.1
86
+ ujson==5.10.0
87
+ urllib3==2.2.2
88
+ uvicorn==0.30.1
89
+ watchfiles==0.22.0
90
+ websockets==11.0.3
91
+ wheel==0.43.0
t_esv.csv ADDED
The diff for this file is too large to render. See raw diff