Spaces:
Sleeping
Sleeping
File size: 3,154 Bytes
69600e9 b8d83c5 69600e9 5422f74 69600e9 2edd0c0 69600e9 40cb9bc 69600e9 b8d83c5 69600e9 2edd0c0 b8d83c5 7493a04 6dd97be 2edd0c0 b8d83c5 2edd0c0 b8d83c5 69600e9 b8d83c5 69600e9 74c2651 69600e9 b8d83c5 69600e9 b8d83c5 69600e9 ab22e23 e80badc c60a889 b8d83c5 69600e9 b8d83c5 69600e9 b8d83c5 69600e9 b8d83c5 69600e9 b8d83c5 69600e9 b8d83c5 69600e9 b8d83c5 69600e9 b8d83c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_community.vectorstores import FAISS
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from transformers import pipeline
from huggingface_hub import login
login(os.environ["HF_TOKEN"])
# -----------------------------
# Prompt (forces concise output)
# -----------------------------
QA_PROMPT = PromptTemplate(
template="""Answer the following question in a short and concise way
(maximum 20 sentences), using only the information from the context below.
If you don’t know the answer, just say "I cant assist you".
Context:
{context}
Question:
{question}
Concise Answer:""",
input_variables=["context", "question"],
)
# -----------------------------
# Load and process documents
# -----------------------------
pdfs = ["ejemplo2.pdf"]
docs = []
for pdf in pdfs:
docs.extend(PyPDFLoader(pdf).load())
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")
vectorstore = FAISS.from_documents(splits, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
cross_encoder = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
compressor = CrossEncoderReranker(model=cross_encoder, top_n=5)
compression_retriever = ContextualCompressionRetriever(
base_retriever=retriever, base_compressor=compressor
)
# -----------------------------
# Configure FLAN-T5 (better task)
# -----------------------------
generator = pipeline(
"text2text-generation", # 👈 Use this for T5 models
model="google/flan-t5-base",
max_new_tokens=512, # shorter answers
temperature=5,
repetition_penalty=1.1
)
llm = HuggingFacePipeline(pipeline=generator)
# -----------------------------
# RetrievalQA
# -----------------------------
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=compression_retriever,
return_source_documents=True,
chain_type_kwargs={"prompt": QA_PROMPT}
)
# -----------------------------
# FastAPI app
# -----------------------------
app = FastAPI(title="PDF QA API", description="Query PDFs with RAG + HuggingFace")
class QueryRequest(BaseModel):
query: str
class QueryResponse(BaseModel):
answer: str
sources: List[str]
@app.post("/ask", response_model=QueryResponse)
def ask_question(request: QueryRequest):
result = qa_chain.invoke({"query": request.query})
return QueryResponse(
answer=result["result"],
sources=[doc.metadata.get("source", "unknown") for doc in result["source_documents"]]
)
|