Spaces:

vickyvigneshmass
/

test

Runtime error

App Files Files Community

vickyvigneshmass commited on Apr 7, 2025

Commit

836cff2

verified ·

1 Parent(s): a5faf3c

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -3

app.py CHANGED Viewed

@@ -1,7 +1,84 @@
-from fastapi import FastAPI
 app = FastAPI()
 @app.get("/")
-def greet_json():
-    return {"welcome": "Created!"}

+from fastapi import FastAPI, Query, UploadFile, File
+from sentence_transformers import SentenceTransformer, util
+import torch
+import pickle
+import os
+# FastAPI instance
 app = FastAPI()
+# Global variables
+MODEL_NAME = 'all-MiniLM-L6-v2'
+EMBEDDING_CACHE = 'embeddings_cache.pkl'
+DOCUMENT_PATH = 'test.txt'
+model = SentenceTransformer(MODEL_NAME)
+sentences = []
+sentence_embeddings = None
+# Function to load and encode document
+def load_and_encode_document(file_path):
+    with open(file_path, "r", encoding="utf-8") as f:
+        document_text = f.read()
+    sents = [line.strip() for line in document_text.split('\n') if line.strip()]
+    embs = model.encode(sents, convert_to_tensor=True)
+    return sents, embs
+# Load embeddings if cached
+if os.path.exists(EMBEDDING_CACHE):
+    with open(EMBEDDING_CACHE, 'rb') as f:
+        sentences, sentence_embeddings = pickle.load(f)
+else:
+    sentences, sentence_embeddings = load_and_encode_document(DOCUMENT_PATH)
+    with open(EMBEDDING_CACHE, 'wb') as f:
+        pickle.dump((sentences, sentence_embeddings), f)
 @app.get("/")
+def welcome():
+    return {"message": "Document Retrieval Service is Running!"}
+@app.get("/search")
+def search_text(
+    text: str = Query(..., description="Enter your query"),
+    top_k: int = Query(5, description="Number of top matches to return"),
+    threshold: float = Query(0.5, description="Minimum similarity score threshold")
+):
+    query_embedding = model.encode(text, convert_to_tensor=True)
+    scores = util.cos_sim(query_embedding, sentence_embeddings)[0]
+    top_results = torch.topk(scores, k=top_k)
+    results = []
+    for idx in top_results.indices:
+        score = scores[idx].item()
+        if score >= threshold:
+            results.append({
+                "matched_sentence": sentences[idx],
+                "similarity_score": round(score, 3)
+            })
+    return {
+        "query": text,
+        "top_matches": results or "No relevant matches found above threshold."
+    }
+@app.post("/upload")
+async def upload_file(file: UploadFile = File(...)):
+    content = await file.read()
+    text = content.decode("utf-8")
+    with open(DOCUMENT_PATH, "w", encoding="utf-8") as f:
+        f.write(text)
+    global sentences, sentence_embeddings
+    sentences, sentence_embeddings = load_and_encode_document(DOCUMENT_PATH)
+    with open(EMBEDDING_CACHE, 'wb') as f:
+        pickle.dump((sentences, sentence_embeddings), f)
+    return {"message": f"File '{file.filename}' uploaded and processed successfully."}
+@app.post("/load_model")
+def load_model(model_name: str = Query(..., description="HuggingFace model name to load")):
+    global model, sentences, sentence_embeddings, MODEL_NAME
+    MODEL_NAME = model_name
+    model = SentenceTransformer(model_name)
+    sentences, sentence_embeddings = load_and_encode_document(DOCUMENT_PATH)
+    with open(EMBEDDING_CACHE, 'wb') as f:
+        pickle.dump((sentences, sentence_embeddings), f)
+    return {"message": f"Model '{model_name}' loaded and document re-embedded successfully."}