João Lima commited on
Commit
ca69070
·
1 Parent(s): ba96169

fixing stuffs

Browse files
Files changed (6) hide show
  1. app.py +42 -4
  2. evaluation/metrics.py +27 -17
  3. ingestion/pdf.py +12 -6
  4. rag/llm.py +35 -6
  5. rag/pipeline.py +9 -7
  6. requirements.txt +5 -9
app.py CHANGED
@@ -1,7 +1,45 @@
1
  import gradio as gr
 
 
2
 
3
- def echo(text):
4
- return text
5
 
6
- demo = gr.Interface(fn=echo, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from ingestion.pdf import process_pdf
3
+ from rag.pipeline import run_rag
4
 
5
+ vectorstore = None
 
6
 
7
+ def load_document(file):
8
+ global vectorstore
9
+ if file is None:
10
+ return "Please upload a PDF file."
11
+ try:
12
+ vectorstore = process_pdf(file.name)
13
+ return "Document processed successfully."
14
+ except Exception as e:
15
+ return f"Error: {str(e)}"
16
+
17
+ def ask(question):
18
+ if vectorstore is None:
19
+ return "Upload a document first", "", ""
20
+ if not question.strip():
21
+ return "Please enter a question", "", ""
22
+ try:
23
+ return run_rag(question, vectorstore)
24
+ except Exception as e:
25
+ return f"Error: {str(e)}", "", ""
26
+
27
+ with gr.Blocks(title="Tech Explainer RAG") as demo:
28
+ gr.Markdown("# Tech Explainer — RAG with Automatic Evaluation")
29
+
30
+ file = gr.File(label="Upload PDF", file_types=[".pdf"])
31
+ load_btn = gr.Button("Process PDF")
32
+ status = gr.Textbox(label="Status")
33
+
34
+ question = gr.Textbox(label="Question", placeholder="Ask a question about the document...")
35
+ ask_btn = gr.Button("Ask")
36
+
37
+ answer = gr.Textbox(label="Answer", lines=5)
38
+ sources = gr.Textbox(label="Sources", lines=2)
39
+ evaluation = gr.Textbox(label="Evaluation", lines=3)
40
+
41
+ load_btn.click(load_document, inputs=file, outputs=status)
42
+ ask_btn.click(ask, inputs=question, outputs=[answer, sources, evaluation])
43
+
44
+ if __name__ == "__main__":
45
+ demo.launch()
evaluation/metrics.py CHANGED
@@ -1,33 +1,43 @@
1
  import json
 
2
  from datetime import datetime
3
  from rag.llm import generate
4
 
5
-
6
  LOG_PATH = "logs/metrics.jsonl"
7
 
8
 
 
 
 
 
 
9
  def evaluate_and_log(question, context, answer):
 
 
10
  prompt = (
11
- "Evaluate the answer based on faithfulness, clarity, usefulness and completeness. "
12
- "Provide a short justification.\n"
13
- f"Context:\n{context}\n"
14
- f"Answer:\n{answer}"
 
15
  )
16
 
17
-
18
- evaluation = generate(prompt, max_tokens=200)
19
-
 
20
 
21
  record = {
22
- "timestamp": datetime.utcnow().isoformat(),
23
- "question": question,
24
- "answer": answer,
25
- "evaluation": evaluation
26
  }
27
 
 
 
 
 
 
28
 
29
- with open(LOG_PATH, "a") as f:
30
- f.write(json.dumps(record) + "\n")
31
-
32
-
33
- return evaluation
 
1
  import json
2
+ import os
3
  from datetime import datetime
4
  from rag.llm import generate
5
 
 
6
  LOG_PATH = "logs/metrics.jsonl"
7
 
8
 
9
+ def ensure_log_dir():
10
+ """Cria o diretório de logs se não existir"""
11
+ os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
12
+
13
+
14
  def evaluate_and_log(question, context, answer):
15
+ ensure_log_dir()
16
+
17
  prompt = (
18
+ "Evaluate the answer based on faithfulness, clarity, usefulness and completeness. "
19
+ "Provide a short justification.\n\n"
20
+ f"Context:\n{context}\n\n"
21
+ f"Answer:\n{answer}\n\n"
22
+ "Evaluation:"
23
  )
24
 
25
+ try:
26
+ evaluation = generate(prompt, max_tokens=200)
27
+ except Exception as e:
28
+ evaluation = f"Evaluation failed: {str(e)}"
29
 
30
  record = {
31
+ "timestamp": datetime.utcnow().isoformat(),
32
+ "question": question,
33
+ "answer": answer,
34
+ "evaluation": evaluation
35
  }
36
 
37
+ try:
38
+ with open(LOG_PATH, "a", encoding="utf-8") as f:
39
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
40
+ except Exception as e:
41
+ print(f"Warning: Could not write to log file: {e}")
42
 
43
+ return evaluation
 
 
 
 
ingestion/pdf.py CHANGED
@@ -1,16 +1,22 @@
1
  from pypdf import PdfReader
2
- from langchain_text_splitters import RecursiveCharacterTextSplitter
3
- from langchain_core.documents import Document
4
  from rag.retriever import build_vectorstore
5
 
6
 
7
- def process_pdf(file):
8
- reader = PdfReader(file.name)
 
 
 
9
  text = "".join(page.extract_text() or "" for page in reader.pages)
10
 
11
- splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
 
 
 
12
 
13
  chunks = splitter.split_text(text)
14
  documents = [Document(page_content=c) for c in chunks]
15
 
16
- return build_vectorstore(documents)
 
1
  from pypdf import PdfReader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.schema import Document
4
  from rag.retriever import build_vectorstore
5
 
6
 
7
+ def process_pdf(file_path):
8
+ """
9
+ file_path: string - caminho para o arquivo PDF
10
+ """
11
+ reader = PdfReader(file_path)
12
  text = "".join(page.extract_text() or "" for page in reader.pages)
13
 
14
+ splitter = RecursiveCharacterTextSplitter(
15
+ chunk_size=500,
16
+ chunk_overlap=100
17
+ )
18
 
19
  chunks = splitter.split_text(text)
20
  documents = [Document(page_content=c) for c in chunks]
21
 
22
+ return build_vectorstore(documents)
rag/llm.py CHANGED
@@ -6,23 +6,52 @@ from config import PRIMARY_LLM, FALLBACK_LLM
6
  def load_model():
7
  try:
8
  tokenizer = AutoTokenizer.from_pretrained(PRIMARY_LLM)
 
 
 
9
  model = AutoModelForCausalLM.from_pretrained(
10
- PRIMARY_LLM, device_map="auto"
 
 
11
  )
12
- except Exception:
 
 
 
 
13
  tokenizer = AutoTokenizer.from_pretrained(FALLBACK_LLM, trust_remote_code=True)
14
- model = AutoModelForCausalLM.from_pretrained(FALLBACK_LLM)
 
 
 
 
 
 
 
 
15
 
16
  return tokenizer, model
17
 
 
18
  tokenizer, model = load_model()
19
 
20
 
21
  def generate(prompt, max_tokens=400):
22
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
23
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
24
 
25
  with torch.no_grad():
26
- output = model.generate(**inputs, max_new_tokens=max_tokens)
 
 
 
 
 
 
27
 
28
- return tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
 
 
 
6
  def load_model():
7
  try:
8
  tokenizer = AutoTokenizer.from_pretrained(PRIMARY_LLM)
9
+ if tokenizer.pad_token is None:
10
+ tokenizer.pad_token = tokenizer.eos_token
11
+
12
  model = AutoModelForCausalLM.from_pretrained(
13
+ PRIMARY_LLM,
14
+ device_map="auto",
15
+ load_in_8bit=True
16
  )
17
+ print(f"Loaded primary model: {PRIMARY_LLM}")
18
+ except Exception as e:
19
+ print(f"Primary model failed: {e}")
20
+ print(f"Loading fallback: {FALLBACK_LLM}")
21
+
22
  tokenizer = AutoTokenizer.from_pretrained(FALLBACK_LLM, trust_remote_code=True)
23
+ if tokenizer.pad_token is None:
24
+ tokenizer.pad_token = tokenizer.eos_token
25
+
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ FALLBACK_LLM,
28
+ trust_remote_code=True,
29
+ torch_dtype=torch.float16,
30
+ device_map="auto"
31
+ )
32
 
33
  return tokenizer, model
34
 
35
+
36
  tokenizer, model = load_model()
37
 
38
 
39
  def generate(prompt, max_tokens=400):
40
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
41
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
42
 
43
  with torch.no_grad():
44
+ output = model.generate(
45
+ **inputs,
46
+ max_new_tokens=max_tokens,
47
+ do_sample=True,
48
+ temperature=0.7,
49
+ top_p=0.9
50
+ )
51
 
52
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
53
+
54
+ if prompt in generated_text:
55
+ generated_text = generated_text.replace(prompt, "").strip()
56
+
57
+ return generated_text
rag/pipeline.py CHANGED
@@ -1,19 +1,21 @@
1
  from rag.llm import generate
2
  from evaluation.metrics import evaluate_and_log
3
 
 
4
  def run_rag(question, vectorstore):
5
  docs = vectorstore.similarity_search(question, k=3)
6
- context = "\n".join(d.page_content for d in docs)
7
-
8
 
9
  prompt = (
10
- "Use the context below to answer the question clearly and simply.\n"
11
- f"Context:\n{context}\n"
12
- f"Question: {question}"
 
13
  )
14
 
15
-
16
  answer = generate(prompt)
17
  evaluation = evaluate_and_log(question, context, answer)
18
 
19
- return answer, "Sources retrieved from document", evaluation
 
 
 
1
  from rag.llm import generate
2
  from evaluation.metrics import evaluate_and_log
3
 
4
+
5
  def run_rag(question, vectorstore):
6
  docs = vectorstore.similarity_search(question, k=3)
7
+ context = "\n\n".join(d.page_content for d in docs)
 
8
 
9
  prompt = (
10
+ "Use the context below to answer the question clearly and simply.\n\n"
11
+ f"Context:\n{context}\n\n"
12
+ f"Question: {question}\n\n"
13
+ "Answer:"
14
  )
15
 
 
16
  answer = generate(prompt)
17
  evaluation = evaluate_and_log(question, context, answer)
18
 
19
+ sources = f"Retrieved {len(docs)} relevant passages from document"
20
+
21
+ return answer, sources, evaluation
requirements.txt CHANGED
@@ -1,14 +1,10 @@
1
- gradio
2
- gradio_client>=0.8.0
3
  transformers
4
  sentence-transformers
5
- torch
6
- pypdf
7
-
8
  langchain
9
- langchain-core
10
  langchain-community
11
- langchain-text-splitters
12
- langchain-huggingface
13
-
14
  faiss-cpu
 
 
 
 
 
1
+ gradio==4.44.0
 
2
  transformers
3
  sentence-transformers
 
 
 
4
  langchain
 
5
  langchain-community
 
 
 
6
  faiss-cpu
7
+ pypdf
8
+ torch
9
+ accelerate
10
+ bitsandbytes