Rauhan commited on
Commit
ad329be
·
1 Parent(s): 150d369

UPDATE: prompts

Browse files
config.ini CHANGED
@@ -4,10 +4,6 @@ BASEURL = https://api.groq.com/openai/v1
4
  [DETAIL EXTRACTOR]
5
  VLM = meta-llama/llama-4-maverick-17b-128e-instruct
6
  BATCHSIZE = 5
7
- MAXTOKENS = 1024
8
- TEMPERATURE = 0.5
9
 
10
  [SUMMARIZER]
11
- LLM = meta-llama/llama-4-maverick-17b-128e-instruct
12
- MAXTOKENS = 4096
13
- TEMPERATURE = 1
 
4
  [DETAIL EXTRACTOR]
5
  VLM = meta-llama/llama-4-maverick-17b-128e-instruct
6
  BATCHSIZE = 5
 
 
7
 
8
  [SUMMARIZER]
9
+ LLM = meta-llama/llama-4-maverick-17b-128e-instruct
 
 
src/components/extractPdfDetails.py CHANGED
@@ -68,10 +68,7 @@ class ExtractPdfDetails:
68
  messages = [
69
  {"role": "system", "content": self.prompts["detailExtractorPrompt"]},
70
  {"role": "user", "content": [{"type": "image_url", "image_url": {"url": convertImageToBase64(image)}} for image in images]}
71
- ],
72
- # temperature = self.config.getfloat("DETAIL EXTRACTOR", "TEMPERATURE"),
73
- # max_tokens = self.config.getint("DETAIL EXTRACTOR", "MAXTOKENS"),
74
- # stream = False
75
  )
76
  response = completion.choices[0].message.content
77
  return response
 
68
  messages = [
69
  {"role": "system", "content": self.prompts["detailExtractorPrompt"]},
70
  {"role": "user", "content": [{"type": "image_url", "image_url": {"url": convertImageToBase64(image)}} for image in images]}
71
+ ]
 
 
 
72
  )
73
  response = completion.choices[0].message.content
74
  return response
src/components/summaryEngine.py CHANGED
@@ -19,17 +19,20 @@ class SummaryEngine:
19
  """
20
  try:
21
  logger.info("Summarizing the details extracted from the images")
22
- allSummaries = "\n".join(texts)
 
 
 
 
 
23
  completion = litellm.completion(
24
  model = self.config.get("SUMMARIZER", "LLM"),
25
  api_key = os.environ["GROQ_API_KEY"],
26
- api_base = self.config["GROQ CONFIG"]["BASEURL"],
27
  messages = [
28
  {"role": "system", "content": self.prompts["summaryEnginePrompt"]},
29
  {"role": "user", "content": f"AGGEREGATED SUMMARIES: {allSummaries}"}
30
- ],
31
- # max_tokens = self.config.getint("SUMMARIZER", "MAXTOKENS"),
32
- # temperature = self.config.getfloat("SUMMARIZER", "TEMPERATURE")
33
  )
34
  response = completion["choices"][0]["message"]["content"]
35
  logger.info("Summary generated successfully")
 
19
  """
20
  try:
21
  logger.info("Summarizing the details extracted from the images")
22
+ allSummaries = str()
23
+ for i in range(len(texts)):
24
+ batchSize = self.config.getint("DETAIL EXTRACTOR", "BATCHSIZE")
25
+ startPage = i * batchSize + 1
26
+ endPage = startPage + batchSize - 1
27
+ allSummaries += f"# SUMMARY FOR PAGES {startPage}-{endPage}:\n" + texts[i]
28
  completion = litellm.completion(
29
  model = self.config.get("SUMMARIZER", "LLM"),
30
  api_key = os.environ["GROQ_API_KEY"],
31
+ api_base = self.config.get("GROQ CONFIG", "BASEURL"),
32
  messages = [
33
  {"role": "system", "content": self.prompts["summaryEnginePrompt"]},
34
  {"role": "user", "content": f"AGGEREGATED SUMMARIES: {allSummaries}"}
35
+ ]
 
 
36
  )
37
  response = completion["choices"][0]["message"]["content"]
38
  logger.info("Summary generated successfully")
src/utils/functions.py CHANGED
@@ -25,7 +25,7 @@ def convertImageToBase64(image: Image.Image) -> str:
25
  imageString: base64 uri of the image
26
  """
27
  buffered = BytesIO()
28
- image.save(buffered, format="JPEG", optimize=True, quality=85)
29
  imageBytes = buffered.getvalue()
30
  imageBase64 = base64.b64encode(imageBytes).decode("utf-8")
31
  dataUri = f"data:image/jpeg;base64,{imageBase64}"
 
25
  imageString: base64 uri of the image
26
  """
27
  buffered = BytesIO()
28
+ image.save(buffered, format = "JPEG", optimize = True, quality = 85)
29
  imageBytes = buffered.getvalue()
30
  imageBase64 = base64.b64encode(imageBytes).decode("utf-8")
31
  dataUri = f"data:image/jpeg;base64,{imageBase64}"