sohchattglc11111 commited on
Commit
52687ba
·
verified ·
1 Parent(s): 7cccf9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -23
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import nltk
2
  import os, json
3
- from dotenv import load_dotenv
 
4
  nltk.download("punkt_tab")
5
 
6
  RETRIEVER = None
@@ -8,10 +9,29 @@ RETRIEVER = None
8
  import gradio as gr
9
  import nltk
10
  from typing import List
 
 
 
11
 
 
 
 
12
 
 
 
 
 
13
 
14
- from dataclasses import dataclass
 
 
 
 
 
 
 
 
 
15
 
16
  @dataclass
17
  class Utterance:
@@ -52,7 +72,7 @@ def parse_webvtt(path: str) -> list[Utterance]:
52
 
53
 
54
 
55
- from nltk.tokenize import sent_tokenize
56
 
57
  def build_subchunks(
58
  utterances,
@@ -93,7 +113,7 @@ def build_subchunks(
93
  return subchunks
94
 
95
 
96
- import re
97
 
98
  TOPIC_RULES = {
99
  "gpu": ["gpu", "graphics card", "cuda", "vram", "nvidia"],
@@ -123,25 +143,9 @@ def tag_topics(text: str) -> list[str]:
123
  return list(tags)
124
 
125
 
126
- from llama_index.core import Settings, VectorStoreIndex
127
- from llama_index.core.schema import TextNode
128
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
129
- from llama_index.llms.openai import OpenAI
130
 
131
- Settings.embed_model = HuggingFaceEmbedding(
132
- model_name="sentence-transformers/all-MiniLM-L6-v2"
133
- )
134
 
135
 
136
- Settings.llm = OpenAI(
137
- api_key= os.environ.get("OPENAI_API_KEY"),
138
- base_url= os.environ.get("OPENAI_API_BASE")
139
-
140
- )
141
-
142
- # OPENAI_API_KEY = os.environ.get("API_KEY")
143
- # OPENAI_API_BASE = os.environ.get("API_BASE")
144
-
145
 
146
 
147
 
@@ -162,8 +166,7 @@ def build_nodes(subchunks):
162
  return nodes
163
 
164
 
165
- from llama_index.retrievers.bm25 import BM25Retriever
166
- from llama_index.core.retrievers import QueryFusionRetriever
167
 
168
  def build_hybrid_retriever(nodes):
169
  index = VectorStoreIndex(nodes)
@@ -205,7 +208,7 @@ def infer_required_topics(q: str) -> set[str]:
205
  return req
206
 
207
 
208
- from sentence_transformers import CrossEncoder
209
 
210
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
211
 
 
1
  import nltk
2
  import os, json
3
+ #from dotenv import load_dotenv
4
+
5
  nltk.download("punkt_tab")
6
 
7
  RETRIEVER = None
 
9
  import gradio as gr
10
  import nltk
11
  from typing import List
12
+ from nltk.tokenize import sent_tokenize
13
+ from dataclasses import dataclass
14
+ import re
15
 
16
+ from sentence_transformers import CrossEncoder
17
+ from llama_index.retrievers.bm25 import BM25Retriever
18
+ from llama_index.core.retrievers import QueryFusionRetriever
19
 
20
+ from llama_index.core import Settings, VectorStoreIndex
21
+ from llama_index.core.schema import TextNode
22
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
23
+ from llama_index.llms.openai import OpenAI
24
 
25
+
26
+
27
+
28
+ Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
+
30
+
31
+ Settings.llm = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url=os.environ.get("OPENAI_API_BASE"))
32
+
33
+ # OPENAI_API_KEY = os.environ.get("API_KEY")
34
+ # OPENAI_API_BASE = os.environ.get("API_BASE")
35
 
36
  @dataclass
37
  class Utterance:
 
72
 
73
 
74
 
75
+
76
 
77
  def build_subchunks(
78
  utterances,
 
113
  return subchunks
114
 
115
 
116
+
117
 
118
  TOPIC_RULES = {
119
  "gpu": ["gpu", "graphics card", "cuda", "vram", "nvidia"],
 
143
  return list(tags)
144
 
145
 
 
 
 
 
146
 
 
 
 
147
 
148
 
 
 
 
 
 
 
 
 
 
149
 
150
 
151
 
 
166
  return nodes
167
 
168
 
169
+
 
170
 
171
  def build_hybrid_retriever(nodes):
172
  index = VectorStoreIndex(nodes)
 
208
  return req
209
 
210
 
211
+
212
 
213
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
214