Spaces:

gabrix00
/

PapersTracker

Sleeping

App Files Files Community

Gabriele Tuccio commited on Mar 5, 2025

Commit

62cdc78

1 Parent(s): 641d64f

update

Browse files

Files changed (2) hide show

app.py +83 -0
requirements.txt +114 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import os
+import pandas as pd
+import numpy as np
+from supabase import create_client
+from sentence_transformers import SentenceTransformer
+import faiss
+import gradio as gr
+# Configura Supabase
+SUPABASE_URL = os.getenv("SUPABASE_URL")
+SUPABASE_KEY = os.getenv("SUPABASE_KEY")
+EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+# Crea client per Supabase
+supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
+# Carica il modello per gli embedding
+model = SentenceTransformer(EMBEDDING_MODEL)
+# Funzione per caricare i dati da Supabase
+def load_database(supabase_client):
+    # Recupera gli articoli da Supabase (assumiamo che la tabella si chiami 'papers')
+    response = supabase_client.table("papers").select("*").execute()
+    data = response.data
+    # Converti i dati in un DataFrame
+    df = pd.DataFrame(data)
+    return df
+# Funzione per creare l'indice FAISS
+def create_faiss_index(embeddings):
+    dimension = len(embeddings[0])  # La dimensione degli embedding
+    index = faiss.IndexFlatL2(dimension)  # Crea un indice per la ricerca basato sulla distanza L2
+    index.add(np.array(embeddings, dtype=np.float32))  # Aggiungi gli embedding all'indice
+    return index
+# Funzione per eseguire la ricerca
+def search_papers(query, supabase_client):
+    # Carica i dati e l'indice FAISS
+    df = load_database(supabase_client)
+    index = create_faiss_index(df["embedding"].tolist())
+    # Calcola l'embedding della query
+    query_embedding = model.encode(query).reshape(1, -1).astype(np.float32)
+    distances, indices = index.search(query_embedding, 3)  # Cerca i top 2 articoli più simili
+    results = df.iloc[indices[0]]  # Recupera i risultati corrispondenti
+    return [(res["title"], res["url"]) for _, res in results.iterrows()]
+# Funzione per generare l'interfaccia Gradio
+def gradio_interface(query):
+    results = search_papers(query, supabase)
+    html_output = "<ul style='list-style-type: none; padding-left: 0;'>"
+    for title, url in results:
+        html_output += f"""
+            <li style="margin-bottom: 15px;">
+                <a href='{url}' target='_blank' style='text-decoration: none; color: #007bff; font-size: 18px; font-weight: bold;'>
+                    {title}
+                </a>
+            </li>
+        """
+    html_output += "</ul>"
+    return html_output
+# Interfaccia Gradio
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.Textbox(label="Inserisci la tua query di ricerca", placeholder="Es. 'Deep Learning for NLP'", lines=1),
+    outputs=gr.HTML(label="Articoli correlati", elem_id="output-section"),
+    live=True,
+    title="Ricerca articoli arXiv",
+    description="Inserisci una query per trovare articoli correlati.",
+)
+# Avvia l'interfaccia
+if __name__ == "__main__":
+    iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,114 @@

+aiofiles==23.2.1
+aiohappyeyeballs==2.4.8
+aiohttp==3.11.13
+aiosignal==1.3.2
+altair==5.5.0
+annotated-types==0.7.0
+anyio==4.8.0
+arxiv==2.1.3
+async-timeout==5.0.1
+attrs==25.1.0
+blinker==1.9.0
+cachetools==5.5.2
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+deprecation==2.1.0
+exceptiongroup==1.2.2
+faiss-cpu==1.10.0
+fastapi==0.115.11
+feedparser==6.0.11
+ffmpy==0.5.0
+filelock==3.17.0
+frozenlist==1.5.0
+fsspec==2025.2.0
+gitdb==4.0.12
+GitPython==3.1.44
+gotrue==2.11.4
+gradio==5.20.0
+gradio_client==1.7.2
+groovy==0.1.2
+h11==0.14.0
+h2==4.2.0
+hpack==4.1.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.1
+hyperframe==6.1.0
+idna==3.10
+Jinja2==3.1.5
+joblib==1.4.2
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.1.0
+narwhals==1.29.0
+networkx==3.4.2
+numpy==2.2.3
+orjson==3.10.15
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+postgrest==0.19.3
+propcache==0.3.0
+protobuf==5.29.3
+psycopg2-binary==2.9.10
+pyaes==1.6.1
+pyarrow==19.0.1
+pyasn1==0.6.1
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydeck==0.9.1
+pydub==0.25.1
+Pygments==2.19.1
+PyMuPDF==1.25.3
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.1
+PyYAML==6.0.2
+realtime==2.4.1
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+rpds-py==0.23.1
+rsa==4.9
+ruff==0.9.9
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.2
+semantic-version==2.10.0
+sentence-transformers==3.4.1
+sgmllib3k==1.0.0
+shellingham==1.5.4
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.46.0
+storage3==0.11.3
+streamlit==1.42.2
+StrEnum==0.4.15
+supabase==2.13.0
+supafunc==0.9.3
+sympy==1.13.1
+Telethon==1.39.0
+tenacity==9.0.0
+threadpoolctl==3.5.0
+tokenizers==0.21.0
+toml==0.10.2
+tomlkit==0.13.2
+torch==2.6.0
+tornado==6.4.2
+tqdm==4.67.1
+transformers==4.49.0
+typer==0.15.2
+typing_extensions==4.12.2
+tzdata==2025.1
+urllib3==2.3.0
+uvicorn==0.34.0
+websockets==14.2
+yarl==1.18.3