Spaces:
Runtime error
Runtime error
| import http.client as http_client | |
| import json | |
| import logging | |
| import os | |
| import re | |
| import string | |
| import gradio as gr | |
| import requests | |
| def mark_tokens_bold(string, tokens): | |
| for token in tokens: | |
| pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b" | |
| string = re.sub(pattern, "<span style='color: #ff75b3;'><b>" + token + "</b></span>", string) | |
| return string | |
| def process_results(results, highlight_terms): | |
| if len(results) == 0: | |
| return """<br><p>No results retrieved.</p><br><hr>""" | |
| results_html = "" | |
| for result in results: | |
| text_html = result["text"] | |
| text_html = mark_tokens_bold(text_html, highlight_terms) | |
| docid_html = str(result["docid"]) | |
| licenses = " | ".join(result["repo_license"]) | |
| repo_name = result["repo_name"] | |
| repo_path = result["repo_path"] | |
| results_html += """\ | |
| <p style='font-size:16px; text-align: left; color: white;'>Repository name: <span style='color: #727cd6;'>{}</span></p> | |
| <p style='font-size:16px; text-align: left; color: white;'>Repository path: <span style='color: #727cd6;'>{}</span></p> | |
| <p style='font-size:16px; text-align: left; color: white;'>Repository licenses: <span style='color: #727cd6;'>{}</span></p> | |
| <br> | |
| <pre style='height: 600px; overflow-y: scroll; overflow-x: hidden; color: #d9d9d9;border: 1px solid #ff75b3; padding: 10px'><code>{}</code></pre> | |
| <br> | |
| <hr> | |
| <br> | |
| """.format(repo_name, repo_path, licenses, text_html) | |
| return results_html | |
| def scisearch(query, language, num_results=10): | |
| query = " ".join(query.split()) | |
| if query == "" or query is None: | |
| return "" | |
| post_data = {"query": query, "k": num_results} | |
| output = requests.post( | |
| os.environ.get("address"), | |
| headers={"Content-type": "application/json"}, | |
| data=json.dumps(post_data), | |
| timeout=60, | |
| ) | |
| payload = json.loads(output.text) | |
| results = payload["results"] | |
| highlight_terms = payload["highlight_terms"] | |
| return process_results(results, highlight_terms) | |
| description = """# <p style="text-align: center; color: white;"><span style='color: #ff75b3;'>π SantaCoder:</span> Dataset Search π </p> | |
| <span style='color: white;'>When you use <a href="todo" style="color: #ff75b3;">IceCoder</a> to generate code it might produce exact copies of code in the pretraining dataset. In that case the code requires | |
| and with this search tool we aim to provide help to finding out where the code came from.</span>""" | |
| if __name__ == "__main__": | |
| demo = gr.Blocks( | |
| css=".gradio-container {background-color: #20233fff; color:white}" | |
| ) | |
| with demo: | |
| with gr.Row(): | |
| gr.Markdown(value=description) | |
| with gr.Row(): | |
| query = gr.Textbox(lines=5, placeholder="Type your query here...", label="Query") | |
| with gr.Row(): | |
| k = gr.Slider(1, 100, value=10, step=1, label="Max Results") | |
| with gr.Row(): | |
| submit_btn = gr.Button("Submit") | |
| with gr.Row(): | |
| results = gr.HTML(label="Results", value="<img src='https://huggingface.co/datasets/bigcode/admin/resolve/main/bigcode_contact.png' alt='contact' style='display: block; margin: auto; max-width: 800px;'>") | |
| def submit(query, k, lang="en"): | |
| query = query.strip() | |
| if query is None or query == "": | |
| return "", "" | |
| return { | |
| results: scisearch(query, lang, k), | |
| } | |
| query.submit(fn=submit, inputs=[query, k], outputs=[results]) | |
| submit_btn.click(submit, inputs=[query, k], outputs=[results]) | |
| demo.launch(enable_queue=True, debug=True) | |