Spaces:

aquibmoin
/

Semantic-Search-with-IndusST

Sleeping

App Files Files Community

aquibmoin commited on Jul 6, 2024

Commit

78db47d

verified ·

1 Parent(s): 35e4eff

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import gradio as gr
+import requests
+import os
+import re
+API_TOKEN = os.getenv('API_TOKEN')
+API_URL = "https://api-inference.huggingface.co/models/nasa-impact/nasa-smd-ibm-st-v2"
+headers = {"Authorization": f"Bearer {API_TOKEN}"}
+def query_similarity(source_sentence, sentences):
+    payload = {
+        "inputs": {
+            "source_sentence": source_sentence,
+            "sentences": sentences
+        }
+    }
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+def format_output(response):
+    results = sorted(response, key=lambda x: x['score'], reverse=True)
+    formatted_results = []
+    for item in results:
+        formatted_results.append(f"Sentence: {item['sentence']}, Score: {item['score']:.4f}")
+    return "\n".join(formatted_results)
+def split_into_chunks(text, chunk_size=100):
+    sentences = re.split(r'(?<=[.!?]) +', text)  # Split text into sentences
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    for sentence in sentences:
+        sentence_length = len(sentence.split())
+        if current_length + sentence_length > chunk_size:
+            chunks.append(" ".join(current_chunk))
+            current_chunk = [sentence]
+            current_length = sentence_length
+        else:
+            current_chunk.append(sentence)
+            current_length += sentence_length
+    if current_chunk:
+        chunks.append(" ".join(current_chunk))
+    return chunks
+def semantic_search(query, document):
+    chunks = split_into_chunks(document)
+    response = query_similarity(query, chunks)
+    return format_output(response)
+def read_file(file):
+    text = file.read().decode('utf-8')
+    return text
+# Define Gradio interface
+iface = gr.Interface(
+    fn=semantic_search,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Enter your query here..."),
+        gr.File(label="Upload a .txt file")
+    ],
+    outputs="text",
+    title="Document Semantic Search",
+    description="Input a query and upload a document (.txt) to find the most semantically similar paragraphs or sentences.",
+    examples=[
+        ["Enter a sample query here...", None]
+    ]
+)
+iface.launch()