kpbotla commited on
Commit
87ea1cd
·
verified ·
1 Parent(s): fc7a0be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -80
app.py CHANGED
@@ -16,6 +16,8 @@ from newspaper import Article
16
  from duckduckgo_search import DDGS
17
  from transformers import pipeline
18
  import logging
 
 
19
 
20
 
21
  from PIL import Image
@@ -33,103 +35,82 @@ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
33
 
34
 
35
 
36
- class SmartResearchAgent:
 
 
37
  def __init__(self):
38
- logging.info("Initializing SmartResearchAgent")
39
  self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
40
- self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
41
- self.blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
42
 
43
- def search_web(self, query: str) -> str:
44
- try:
45
- with DDGS() as ddgs:
46
- results = ddgs.text(query, max_results=3)
47
- top = [f"{i+1}. {r['title']}: {r['href']}" for i, r in enumerate(results)]
48
- return "\n".join(top) if top else "No results found."
49
- except Exception as e:
50
- logging.error(f"Search error: {e}")
51
- return "Error during web search."
52
 
53
- def summarize(self, input_text_or_url: str) -> str:
54
  try:
55
- if input_text_or_url.startswith("http"):
56
- article = Article(input_text_or_url)
57
- article.download()
58
- article.parse()
59
- input_text_or_url = article.text
60
- if not input_text_or_url.strip():
61
- return "No content to summarize."
62
- summary = self.summarizer(input_text_or_url, max_length=160, min_length=40, do_sample=False)
63
  return summary[0]['summary_text'].strip()
64
  except Exception as e:
65
  logging.error(f"Summarization error: {e}")
66
- return "Error during summarization."
67
 
68
- def generate_citation(self, url: str) -> str:
69
- try:
70
- citation_id = hashlib.md5(url.encode()).hexdigest()[:6]
71
- year = datetime.datetime.now().year
72
- citation = (
73
- f"@article{{cite{citation_id},\n"
74
- f" title={{Generated Reference}},\n"
75
- f" author={{Unknown}},\n"
76
- f" journal={{Online}},\n"
77
- f" year={{ {year} }},\n"
78
- f" url={{ {url} }}\n"
79
- f"}}"
80
- )
81
- return citation
82
- except Exception as e:
83
- logging.error(f"Citation error: {e}")
84
- return "Error during citation generation."
85
 
86
- def caption_image(self, image_path: str) -> str:
87
- try:
88
- image = Image.open(image_path).convert("RGB")
89
- inputs = self.blip_processor(image, return_tensors="pt")
90
- out = self.blip_model.generate(**inputs)
91
- caption = self.blip_processor.decode(out[0], skip_special_tokens=True)
92
- return f"Image analysis: {caption}"
93
- except Exception as e:
94
- logging.error(f"Image processing error: {e}")
95
- return "Unable to process the image."
96
 
97
- def __call__(self, question: str, image: Image.Image = None) -> str:
98
- logging.info(f"Received question: {question}")
99
- q_lower = question.lower().strip()
100
 
 
 
101
  try:
102
- # 🔍 Handle image-based chess questions
103
-
104
- if image is not None:
105
- logging.info("Image input detected")
106
- temp_path = "/tmp/input_image.jpg"
107
- image.save(temp_path)
108
- return self.caption_image(temp_path)
109
-
110
- # 🔍 Handle text-based tasks
111
- if q_lower.startswith("search:"):
112
- query = question.split(":", 1)[1].strip()
113
- return self.search_web(query)
114
- elif q_lower.startswith("summarize:"):
115
- target = question.split(":", 1)[1].strip()
116
- return self.summarize(target)
117
- elif q_lower.startswith("generate citation:") or q_lower.startswith("cite:"):
118
  url = question.split(":", 1)[1].strip()
119
  return self.generate_citation(url)
 
 
 
120
  else:
121
- # Default: search + summarize
122
- search_result = self.search_web(question)
123
- first_url = next((line.split(": ", 1)[-1] for line in search_result.splitlines() if "http" in line), None)
124
- if first_url:
125
- summary = self.summarize(first_url)
126
- return f"{summary}\n\nSource: {first_url}"
127
- else:
128
- return "Sorry, I couldn't find relevant information."
129
  except Exception as e:
130
- logging.exception("Unhandled error in agent call")
131
- return f"Agent error: {e}"
132
-
133
 
134
 
135
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
16
  from duckduckgo_search import DDGS
17
  from transformers import pipeline
18
  import logging
19
+ import whisper
20
+ from bs4 import BeautifulSoup
21
 
22
 
23
  from PIL import Image
 
35
 
36
 
37
 
38
+
39
+
40
+ class SmartAgentV2:
41
  def __init__(self):
42
+ self.qa_model = pipeline("text2text-generation", model="google/flan-t5-base")
43
  self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
44
+ self.whisper_model = whisper.load_model("base")
 
45
 
46
+ def search_web(self, query):
47
+ with DDGS() as ddgs:
48
+ results = ddgs.text(query, max_results=3)
49
+ for r in results:
50
+ if "href" in r:
51
+ return r["href"]
52
+ return "No results found."
 
 
53
 
54
+ def summarize_url(self, url):
55
  try:
56
+ article = Article(url)
57
+ article.download()
58
+ article.parse()
59
+ text = article.text
60
+ if not text.strip():
61
+ return "No content found."
62
+ summary = self.summarizer(text, max_length=150, min_length=40, do_sample=False)
 
63
  return summary[0]['summary_text'].strip()
64
  except Exception as e:
65
  logging.error(f"Summarization error: {e}")
66
+ return "Error summarizing."
67
 
68
+ def generate_citation(self, url):
69
+ citation_id = hashlib.md5(url.encode()).hexdigest()[:6]
70
+ year = datetime.datetime.now().year
71
+ return f"@article{{cite{citation_id}, title={{Generated Citation}}, author={{Unknown}}, journal={{Online}}, year={{ {year} }}, url={{ {url} }} }}"
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ def transcribe_audio(self, filepath):
74
+ result = self.whisper_model.transcribe(filepath)
75
+ return result["text"]
76
+
77
+ def sum_food_sales(self, filepath):
78
+ df = pd.read_excel(filepath)
79
+ food_df = df[df["Category"].str.lower() == "food"]
80
+ total = food_df["Sales"].sum()
81
+ return f"${total:.2f}"
 
82
 
83
+ def answer_fact(self, question):
84
+ return self.qa_model(question, max_length=100)[0]["generated_text"].strip()
 
85
 
86
+ def __call__(self, question: str, file=None):
87
+ q = question.lower().strip()
88
  try:
89
+ if any(word in q for word in ["image", "chess", "diagram"]):
90
+ return "I'm a text-only agent and cannot interpret images."
91
+ if any(word in q for word in ["youtube", "video"]):
92
+ return "I'm unable to access or analyze video/audio from YouTube."
93
+ if file:
94
+ if filepath := getattr(file, "name", None):
95
+ if filepath.endswith(".mp3"):
96
+ transcript = self.transcribe_audio(filepath)
97
+ return transcript
98
+ elif filepath.endswith(".xlsx") or filepath.endswith(".xls"):
99
+ return self.sum_food_sales(filepath)
100
+ if q.startswith("summarize:"):
101
+ url = question.split(":", 1)[1].strip()
102
+ return self.summarize_url(url)
103
+ elif q.startswith("generate citation:") or q.startswith("cite:"):
 
104
  url = question.split(":", 1)[1].strip()
105
  return self.generate_citation(url)
106
+ elif q.startswith("search:"):
107
+ query = question.split(":", 1)[1].strip()
108
+ return self.search_web(query)
109
  else:
110
+ return self.answer_fact(question)
 
 
 
 
 
 
 
111
  except Exception as e:
112
+ logging.error(f"Error: {e}")
113
+ return "An error occurred processing the question."
 
114
 
115
 
116
  def run_and_submit_all( profile: gr.OAuthProfile | None):