File size: 3,154 Bytes
69600e9
b8d83c5
69600e9
 
5422f74
69600e9
 
 
 
 
 
 
2edd0c0
69600e9
 
40cb9bc
 
 
 
69600e9
b8d83c5
69600e9
2edd0c0
b8d83c5
7493a04
6dd97be
2edd0c0
 
 
 
 
 
 
 
b8d83c5
2edd0c0
 
b8d83c5
 
 
69600e9
 
 
b8d83c5
69600e9
 
 
 
74c2651
69600e9
 
 
 
 
 
 
b8d83c5
69600e9
 
b8d83c5
 
 
69600e9
ab22e23
e80badc
c60a889
 
b8d83c5
69600e9
 
 
b8d83c5
 
 
69600e9
b8d83c5
 
 
 
69600e9
 
 
b8d83c5
69600e9
b8d83c5
69600e9
 
b8d83c5
69600e9
 
b8d83c5
 
 
69600e9
 
b8d83c5
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_community.vectorstores import FAISS
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from transformers import pipeline
from huggingface_hub import login

login(os.environ["HF_TOKEN"])

# -----------------------------
# Prompt (forces concise output)
# -----------------------------
QA_PROMPT = PromptTemplate(
   template="""Answer the following question in a short and concise way
(maximum 20 sentences), using only the information from the context below.
If you don’t know the answer, just say "I cant assist you".

Context:
{context}

Question:
{question}

Concise Answer:""",
   input_variables=["context", "question"],
)

# -----------------------------
# Load and process documents
# -----------------------------
pdfs = ["ejemplo2.pdf"]
docs = []
for pdf in pdfs:
   docs.extend(PyPDFLoader(pdf).load())

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")

vectorstore = FAISS.from_documents(splits, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

cross_encoder = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
compressor = CrossEncoderReranker(model=cross_encoder, top_n=5)
compression_retriever = ContextualCompressionRetriever(
   base_retriever=retriever, base_compressor=compressor
)

# -----------------------------
# Configure FLAN-T5 (better task)
# -----------------------------
generator = pipeline(
   "text2text-generation",       # 👈 Use this for T5 models
   model="google/flan-t5-base",
   max_new_tokens=512,           # shorter answers
   temperature=5,
   repetition_penalty=1.1
)
llm = HuggingFacePipeline(pipeline=generator)

# -----------------------------
# RetrievalQA
# -----------------------------
qa_chain = RetrievalQA.from_chain_type(
   llm=llm,
   retriever=compression_retriever,
   return_source_documents=True,
   chain_type_kwargs={"prompt": QA_PROMPT}
)

# -----------------------------
# FastAPI app
# -----------------------------
app = FastAPI(title="PDF QA API", description="Query PDFs with RAG + HuggingFace")

class QueryRequest(BaseModel):
   query: str

class QueryResponse(BaseModel):
   answer: str
   sources: List[str]

@app.post("/ask", response_model=QueryResponse)
def ask_question(request: QueryRequest):
   result = qa_chain.invoke({"query": request.query})
   return QueryResponse(
       answer=result["result"],
       sources=[doc.metadata.get("source", "unknown") for doc in result["source_documents"]]
   )