Spaces:
Sleeping
Sleeping
first step many corpora
Browse files- .gitignore +1 -0
- app.py +13 -4
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*.code-workspace
|
app.py
CHANGED
|
@@ -66,6 +66,14 @@ st.sidebar.markdown(
|
|
| 66 |
unsafe_allow_html=True,
|
| 67 |
)
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
query = st.sidebar.text_input(label="Search query", value="")
|
| 70 |
language = st.sidebar.selectbox(
|
| 71 |
"Language",
|
|
@@ -116,18 +124,20 @@ footer = """
|
|
| 116 |
st.sidebar.markdown(footer, unsafe_allow_html=True)
|
| 117 |
|
| 118 |
|
| 119 |
-
def scisearch(query, language, num_results=10):
|
| 120 |
try:
|
| 121 |
query = query.strip()
|
| 122 |
if query == "" or query is None:
|
| 123 |
return
|
| 124 |
|
|
|
|
|
|
|
| 125 |
post_data = {"query": query, "k": num_results}
|
| 126 |
if language != "detect_language":
|
| 127 |
post_data["lang"] = language
|
| 128 |
|
| 129 |
output = requests.post(
|
| 130 |
-
os.environ.get("address"),
|
| 131 |
headers={"Content-type": "application/json"},
|
| 132 |
data=json.dumps(post_data),
|
| 133 |
timeout=60,
|
|
@@ -143,7 +153,6 @@ def scisearch(query, language, num_results=10):
|
|
| 143 |
Detected language <b>{detected_lang}</b> is not supported.<br>
|
| 144 |
Please choose a language from the dropdown or type another query.
|
| 145 |
</p><br><hr><br>"""
|
| 146 |
-
|
| 147 |
results = payload["results"]
|
| 148 |
highlight_terms = payload["highlight_terms"]
|
| 149 |
except Exception as e:
|
|
@@ -206,7 +215,7 @@ def process_results(hits: list, highlight_terms: list) -> str:
|
|
| 206 |
|
| 207 |
|
| 208 |
if st.sidebar.button("Search"):
|
| 209 |
-
hits, highlight_terms = scisearch(query, LANG_MAPPING[language], max_results)
|
| 210 |
html_results = process_results(hits, highlight_terms)
|
| 211 |
rendered_results = f"""
|
| 212 |
<div id="searchresultsarea">
|
|
|
|
| 66 |
unsafe_allow_html=True,
|
| 67 |
)
|
| 68 |
|
| 69 |
+
corpus = st.sidebar.selectbox(
|
| 70 |
+
"Corpus",
|
| 71 |
+
(
|
| 72 |
+
"LAION",
|
| 73 |
+
"C4",
|
| 74 |
+
),
|
| 75 |
+
index=3,
|
| 76 |
+
)
|
| 77 |
query = st.sidebar.text_input(label="Search query", value="")
|
| 78 |
language = st.sidebar.selectbox(
|
| 79 |
"Language",
|
|
|
|
| 124 |
st.sidebar.markdown(footer, unsafe_allow_html=True)
|
| 125 |
|
| 126 |
|
| 127 |
+
def scisearch(query, corpus, language, num_results=10):
|
| 128 |
try:
|
| 129 |
query = query.strip()
|
| 130 |
if query == "" or query is None:
|
| 131 |
return
|
| 132 |
|
| 133 |
+
corpus = corpus.strip()
|
| 134 |
+
address = os.environ.get("address") if corpus == "LAION" else os.environ.get("address")
|
| 135 |
post_data = {"query": query, "k": num_results}
|
| 136 |
if language != "detect_language":
|
| 137 |
post_data["lang"] = language
|
| 138 |
|
| 139 |
output = requests.post(
|
| 140 |
+
address, # os.environ.get("address"),
|
| 141 |
headers={"Content-type": "application/json"},
|
| 142 |
data=json.dumps(post_data),
|
| 143 |
timeout=60,
|
|
|
|
| 153 |
Detected language <b>{detected_lang}</b> is not supported.<br>
|
| 154 |
Please choose a language from the dropdown or type another query.
|
| 155 |
</p><br><hr><br>"""
|
|
|
|
| 156 |
results = payload["results"]
|
| 157 |
highlight_terms = payload["highlight_terms"]
|
| 158 |
except Exception as e:
|
|
|
|
| 215 |
|
| 216 |
|
| 217 |
if st.sidebar.button("Search"):
|
| 218 |
+
hits, highlight_terms = scisearch(query, corpus, LANG_MAPPING[language], max_results)
|
| 219 |
html_results = process_results(hits, highlight_terms)
|
| 220 |
rendered_results = f"""
|
| 221 |
<div id="searchresultsarea">
|