File size: 2,253 Bytes
96c97be
df04ad4
96c97be
 
 
 
 
 
 
df04ad4
 
96c97be
df04ad4
 
96c97be
df04ad4
96c97be
a519a55
df04ad4
 
 
a519a55
96c97be
a519a55
 
df04ad4
 
a519a55
df04ad4
a519a55
df04ad4
a519a55
df04ad4
a519a55
 
96c97be
df04ad4
96c97be
df04ad4
96c97be
df04ad4
 
96c97be
df04ad4
96c97be
 
df04ad4
96c97be
df04ad4
a519a55
96c97be
df04ad4
96c97be
df04ad4
 
 
 
96c97be
df04ad4
96c97be
df04ad4
96c97be
df04ad4
96c97be
df04ad4
96c97be
df04ad4
96c97be
 
df04ad4
96c97be
 
df04ad4
96c97be
df04ad4
a519a55
df04ad4
a519a55
df04ad4
 
 
96c97be
df04ad4
 
96c97be
df04ad4
96c97be
df04ad4
96c97be
df04ad4
a519a55
df04ad4
 
96c97be
df04ad4
 
a519a55
df04ad4
a519a55
df04ad4
 
96c97be
df04ad4
 
 
96c97be
df04ad4
 
 
 
96c97be
df04ad4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import streamlit as st
import numpy as np
import faiss

from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
from openai import OpenAI

# API Key da OpenAI
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

st.set_page_config(page_title="Professor de Medicina IA")
st.title("🧠 Professor de Medicina IA")

model = SentenceTransformer("all-MiniLM-L6-v2")


# ---------- Ler todos os PDFs ----------

def load_pdfs(folder="pdfs"):

    textos = []

    if not os.path.exists(folder):
        st.error(f"Pasta '{folder}' não encontrada.")
        return textos

    for file in os.listdir(folder):

        if file.endswith(".pdf"):

            caminho = os.path.join(folder, file)

            reader = PdfReader(caminho)

            for page in reader.pages:

                txt = page.extract_text()

                if txt and txt.strip():
                    textos.append(txt.strip())

    return textos


# ---------- Criar índice vetorial ----------

@st.cache_resource
def build_index():

    textos = load_pdfs()

    if not textos:
        return None, []

    embeddings = model.encode(textos)

    embeddings = np.array(embeddings).astype("float32")

    dimension = embeddings.shape[1]

    index = faiss.IndexFlatL2(dimension)

    index.add(embeddings)

    return index, textos


index, textos = build_index()


# ---------- Interface ----------

pergunta = st.text_input("Pergunte sobre medicina:")

if pergunta:

    if index is None:
        st.error("Nenhum texto foi carregado dos PDFs.")
    else:

        q_embed = model.encode([pergunta])
        q_embed = np.array(q_embed).astype("float32")

        D, I = index.search(q_embed, k=3)

        contexto = ""

        for i in I[0]:

            if i < len(textos):
                contexto += textos[i] + "\n\n"

        prompt = f"""
Use apenas o conteúdo abaixo para responder.

Se a resposta não estiver no conteúdo, diga que não encontrou informação suficiente.

Conteúdo:
{contexto}

Pergunta:
{pergunta}
"""

        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}]
        )

        st.write(response.choices[0].message.content)