Syrgak33 commited on
Commit
62911ea
·
verified ·
1 Parent(s): 8e24e1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -7
app.py CHANGED
@@ -3,6 +3,12 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
@@ -53,14 +59,78 @@ class SimplifyAnswerTool(Tool):
53
 
54
  # --- Basic Agent Definition ---
55
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
56
- class BasicAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def __init__(self):
58
- print("BasicAgent initialized.")
59
- def __call__(self, question: str) -> str:
60
- print(f"Agent received question (first 50 chars): {question[:50]}...")
61
- fixed_answer = "This is a default answer."
62
- print(f"Agent returning fixed answer: {fixed_answer}")
63
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  def run_and_submit_all( profile: gr.OAuthProfile | None):
66
  """
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,Tool
7
+ from transformers import load_tool, HfAgent
8
+ import pdfplumber
9
+ import pytesseract
10
+ from PIL import Image
11
+ from io import BytesIO
12
 
13
  # (Keep Constants as is)
14
  # --- Constants ---
 
59
 
60
  # --- Basic Agent Definition ---
61
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
62
+ def extract_text_from_pdf(pdf_url: str) -> str:
63
+ try:
64
+ r = requests.get(pdf_url)
65
+ r.raise_for_status()
66
+ with pdfplumber.open(BytesIO(r.content)) as pdf:
67
+ text = "\n".join(page.extract_text() or "" for page in pdf.pages)
68
+ return text
69
+ except Exception as e:
70
+ print(f"Error extracting PDF text: {e}")
71
+ return ""
72
+
73
+ # Helper to extract text from image url using OCR
74
+ def extract_text_from_image(image_url: str) -> str:
75
+ try:
76
+ r = requests.get(image_url)
77
+ r.raise_for_status()
78
+ img = Image.open(BytesIO(r.content))
79
+ text = pytesseract.image_to_string(img)
80
+ return text.strip()
81
+ except Exception as e:
82
+ print(f"Error extracting image text: {e}")
83
+ return ""
84
+
85
+ class OpalAgent:
86
  def __init__(self):
87
+ # Use a small, fast model to save tokens/costs
88
+ model_name = "tiiuae/falcon-7b-instruct" # Falcon-7B-Instruct is open and efficient
89
+
90
+ self.llm = HfApiModel(model_name, token=HF_TOKEN)
91
+
92
+ # Load tools - search, QA, image captioning
93
+ self.tools = [
94
+ DuckDuckGoSearchTool(),
95
+ load_tool("question-answering"), # for text QA
96
+ load_tool("image-captioning"), # for images
97
+ load_tool("image-question-answering")# for image QA
98
+ ]
99
+
100
+ # Add answer simplification tool
101
+ self.simplify_tool = SimplifyAnswerTool(self.llm)
102
+
103
+ self.agent = CodeAgent(
104
+ llm=self.llm,
105
+ tools=self.tools + [self.simplify_tool],
106
+ verbose=False,
107
+ )
108
+
109
+ def __call__(self, question: str, extra_data: dict = None) -> str:
110
+ context = question
111
+
112
+ # If PDF or image in extra data, preprocess and add to context
113
+ if extra_data:
114
+ if "pdf_url" in extra_data:
115
+ pdf_text = extract_text_from_pdf(extra_data["pdf_url"])
116
+ context += f"\n\n[PDF CONTENT]: {pdf_text[:1000]}" # limit length
117
+
118
+ if "image_url" in extra_data:
119
+ # Use OCR text to add context or just mention image URL
120
+ img_text = extract_text_from_image(extra_data["image_url"])
121
+ if img_text:
122
+ context += f"\n\n[IMAGE TEXT]: {img_text}"
123
+ else:
124
+ context += f"\n\n[IMAGE URL]: {extra_data['image_url']}"
125
+
126
+ try:
127
+ raw_answer = self.agent(context)
128
+ # Simplify final answer
129
+ simple_answer = self.simplify_tool(question=question, answer=raw_answer)
130
+ return simple_answer.strip()
131
+ except Exception as e:
132
+ return f"AGENT ERROR: {e}"
133
+
134
 
135
  def run_and_submit_all( profile: gr.OAuthProfile | None):
136
  """