Francogv commited on
Commit
e0c81b9
verified
1 Parent(s): 2b0a008

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. api_semantica.py +51 -0
  3. base_semantica.json +3 -0
  4. requirements.txt +7 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ base_semantica.json filter=lfs diff=lfs merge=lfs -text
api_semantica.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from sentence_transformers import SentenceTransformer, util
4
+ import numpy as np
5
+ import json
6
+ import uvicorn
7
+
8
+ app = FastAPI()
9
+
10
+ # Cargar el modelo
11
+ modelo = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
12
+
13
+ # Cargar la base de embeddings
14
+ with open("base_semantica.json", "r", encoding="utf-8") as f:
15
+ base = json.load(f)
16
+
17
+ # Pydantic schema para entrada
18
+ class PreguntaInput(BaseModel):
19
+ pregunta: str
20
+ top_k: int = 3
21
+
22
+ # Funci贸n de b煤squeda
23
+ def buscar_semanticamente(pregunta, top_k=3):
24
+ emb_pregunta = modelo.encode(pregunta)
25
+ resultados = []
26
+ for item in base:
27
+ emb_item = np.array(item["embedding"], dtype=np.float32)
28
+ score = util.cos_sim(emb_pregunta, emb_item).item()
29
+ resultados.append((score, item))
30
+ resultados.sort(reverse=True, key=lambda x: x[0])
31
+ return resultados[:top_k]
32
+
33
+ # Endpoint principal
34
+ @app.post("/buscar")
35
+ async def buscar(input: PreguntaInput):
36
+ resultados = buscar_semanticamente(input.pregunta, input.top_k)
37
+ return {
38
+ "pregunta": input.pregunta,
39
+ "resultados": [
40
+ {
41
+ "score": round(score, 4),
42
+ "titulo": item["titulo"],
43
+ "url": item["url"],
44
+ "texto": item["texto"]
45
+ } for score, item in resultados
46
+ ]
47
+ }
48
+
49
+ # Para correr en Hugging Face
50
+ if __name__ == "__main__":
51
+ uvicorn.run(app, host="0.0.0.0", port=7860)
base_semantica.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5758155fccf9c8e753e8fba10980900d78847bb5edfe09ea430ab5dcfbddcb6c
3
+ size 169259490
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ numpy
4
+ sentence-transformers
5
+ torch
6
+ transformers
7
+ pydantic