rjelbruiz320 commited on
Commit
8f0847b
·
verified ·
1 Parent(s): 7b6c0e9

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +4 -4
utils.py CHANGED
@@ -8,7 +8,7 @@ import numpy as np
8
  #Load .env file
9
  load_dotenv()
10
 
11
- #Get model name from environment (default if not found)
12
  MODEL_NAME = os.getenv("MODEL_NAME", "google/flan-t5-small")
13
 
14
  def load_handbook_text(pdf_path):
@@ -33,14 +33,14 @@ def answer_question(query, pdf_path="STUDENT-HANDBOOK-2021-EDITION.pdf"):
33
  chunks = split_text(text)
34
  context = find_relevant_chunk(query, chunks)
35
 
36
- # Load the model (cached automatically by Hugging Face)
37
  qa = pipeline(
38
  "text2text-generation",
39
  model=MODEL_NAME,
40
  tokenizer=MODEL_NAME
41
  )
42
 
43
- # Improved prompt for more precise answers
44
  prompt = (
45
  f"Use only the context below to answer concisely and clearly.\n\n"
46
  f"Question: {query}\n\n"
@@ -50,6 +50,6 @@ def answer_question(query, pdf_path="STUDENT-HANDBOOK-2021-EDITION.pdf"):
50
 
51
  result = qa(prompt, max_new_tokens=100, temperature=0.3, num_beams=4)
52
 
53
- # Clean up output (trim redundant text)
54
  answer = result[0]["generated_text"].strip()
55
  return answer
 
8
  #Load .env file
9
  load_dotenv()
10
 
11
+ #Getting the model name from environment (default if not found)
12
  MODEL_NAME = os.getenv("MODEL_NAME", "google/flan-t5-small")
13
 
14
  def load_handbook_text(pdf_path):
 
33
  chunks = split_text(text)
34
  context = find_relevant_chunk(query, chunks)
35
 
36
+ #Loading the model, which is cached automatically by Hugging Face.
37
  qa = pipeline(
38
  "text2text-generation",
39
  model=MODEL_NAME,
40
  tokenizer=MODEL_NAME
41
  )
42
 
43
+ #Improved prompts for more precise answers.
44
  prompt = (
45
  f"Use only the context below to answer concisely and clearly.\n\n"
46
  f"Question: {query}\n\n"
 
50
 
51
  result = qa(prompt, max_new_tokens=100, temperature=0.3, num_beams=4)
52
 
53
+ #Cleaning up the output by trimming the redundant text
54
  answer = result[0]["generated_text"].strip()
55
  return answer