SanketAI commited on
Commit
ad9a571
·
verified ·
1 Parent(s): 19e1445

Update agents/summarization_agent.py

Browse files
Files changed (1) hide show
  1. agents/summarization_agent.py +103 -108
agents/summarization_agent.py CHANGED
@@ -1,109 +1,104 @@
1
- from langchain.vectorstores import FAISS
2
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
3
- import os
4
- import streamlit as st
5
- from agents import SearchAgent
6
- from config.config import model
7
-
8
-
9
-
10
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
11
-
12
- class SummarizationAgent:
13
- def __init__(self):
14
- self.model = model
15
- self.prompt = """You are a research assistant tasked with synthesizing findings from multiple academic papers over time. Your goal is to create a comprehensive summary that highlights key trends, thematic developments, and methodological evolution within a given timeframe.
16
-
17
- Given the following context, analyze the papers to produce a structured summary:
18
-
19
- Previous conversation:
20
- {chat_history}
21
-
22
- Papers context:
23
- {context}
24
-
25
- Guidelines for timeline-based summarization:
26
-
27
- Key Findings and Trends Over Time
28
-
29
- Identify major discoveries and conclusions, highlighting how they have developed chronologically.
30
- Note emerging trends, consensus, and any evolving contradictions across papers, especially in response to new technologies or shifts in the field.
31
- Present statistical evidence and experimental results in relation to time, pointing out any measurable improvements or declines over the years.
32
- Methodological Evolution
33
-
34
- Compare and contrast research approaches across different time periods, emphasizing changes or advances in data collection, analysis techniques, or tools.
35
- Identify and describe innovative methodological contributions and how these may have impacted research outcomes over time.
36
- Theoretical Progression
37
-
38
- Outline the theoretical foundations and highlight their chronological development.
39
- Connect findings to existing theories, noting how interpretations or theoretical perspectives have evolved.
40
- Identify theoretical advances, challenges, or shifts and their relationship to the timeline.
41
- Practical Applications and Temporal Shifts
42
-
43
- Discuss real-world applications over time, noting how findings have influenced industry practices or technology adoption.
44
- Highlight evolving practical use cases and how implementation considerations have changed with advances in research.
45
- Research Gaps and Future Directions
46
-
47
- Identify limitations in studies across time periods, noting any improvement or persistent gaps.
48
- Point out unexplored areas and suggest specific future research directions informed by chronological developments in the field.
49
- Formatting and Style:
50
-
51
- Organize the summary with clear sections that reflect the temporal progression.
52
- Maintain an academic tone, using specific examples, dates, and quotes where relevant.
53
- Clearly identify and label sections to enhance readability, and acknowledge any limitations in the available context.
54
- """
55
-
56
- self.papers = None
57
- self.search_agent_response = ""
58
-
59
- def solve(self, query):
60
- # Check if search has been performed
61
- if not os.path.exists("vector_db"):
62
- st.warning("No papers loaded. Performing search first...")
63
- search_agent = SearchAgent()
64
- self.search_agent_response, self.papers = search_agent.solve(query)
65
-
66
- # Load vector store
67
- vector_db = FAISS.load_local("vector_db", embeddings, index_name="base_and_adjacent", allow_dangerous_deserialization=True)
68
-
69
- # Get chat history
70
- chat_history = st.session_state.get("chat_history", [])
71
- chat_history_text = "\n".join([f"{sender}: {msg}" for sender, msg in chat_history[-5:]])
72
-
73
- # Get relevant chunks from all papers
74
- retrieved = vector_db.as_retriever(
75
- search_kwargs={"k": 10} # Increase number of chunks to get broader context
76
- ).get_relevant_documents(query)
77
-
78
- # Organize context by paper
79
- context = self._organize_context(retrieved)
80
-
81
- # Generate summary
82
- full_prompt = self.prompt.format(
83
- chat_history=chat_history_text,
84
- context=context
85
- )
86
-
87
- response = self.model.generate_content(str(self.search_agent_response) + full_prompt)
88
- return response.text, self.papers
89
-
90
- def _organize_context(self, documents):
91
- """
92
- Organizes retrieved chunks by paper and creates a structured context.
93
- """
94
- # Group chunks by paper
95
- paper_chunks = {}
96
- for doc in documents:
97
- paper_id = doc.metadata.get('source', 'unknown')
98
- if paper_id not in paper_chunks:
99
- paper_chunks[paper_id] = []
100
- paper_chunks[paper_id].append(doc.page_content)
101
-
102
- # Create structured context
103
- organized_context = []
104
- for paper_id, chunks in paper_chunks.items():
105
- paper_context = f"\nPaper: {paper_id}\n"
106
- paper_context += "\n".join(chunks)
107
- organized_context.append(paper_context)
108
-
109
  return "\n\n".join(organized_context)
 
1
+ from langchain.vectorstores import FAISS
2
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
3
+ import os
4
+ import streamlit as st
5
+ from agents import SearchAgent
6
+ from config.config import model
7
+
8
+
9
+
10
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
11
+
12
+ class SummarizationAgent:
13
+ def __init__(self):
14
+ self.model = model
15
+ self.prompt = """You are a research assistant tasked with synthesizing findings from multiple academic papers over time. Your goal is to create a comprehensive summary that highlights key trends, thematic developments, and methodological evolution within a given timeframe.
16
+
17
+ Given the following context, analyze the papers to produce a structured summary:
18
+
19
+ Previous conversation:
20
+ {chat_history}
21
+
22
+ Papers context:
23
+ {context}
24
+
25
+ Guidelines for timeline-based summarization:
26
+
27
+ Key Findings and Trends Over Time
28
+
29
+ Identify major discoveries and conclusions, highlighting how they have developed chronologically.
30
+ Note emerging trends, consensus, and any evolving contradictions across papers, especially in response to new technologies or shifts in the field.
31
+ Present statistical evidence and experimental results in relation to time, pointing out any measurable improvements or declines over the years.
32
+ Methodological Evolution
33
+
34
+ Compare and contrast research approaches across different time periods, emphasizing changes or advances in data collection, analysis techniques, or tools.
35
+ Identify and describe innovative methodological contributions and how these may have impacted research outcomes over time.
36
+ Theoretical Progression
37
+
38
+ Outline the theoretical foundations and highlight their chronological development.
39
+ Connect findings to existing theories, noting how interpretations or theoretical perspectives have evolved.
40
+ Identify theoretical advances, challenges, or shifts and their relationship to the timeline.
41
+ Practical Applications and Temporal Shifts
42
+
43
+ Discuss real-world applications over time, noting how findings have influenced industry practices or technology adoption.
44
+ Highlight evolving practical use cases and how implementation considerations have changed with advances in research.
45
+ Research Gaps and Future Directions
46
+
47
+ Identify limitations in studies across time periods, noting any improvement or persistent gaps.
48
+ Point out unexplored areas and suggest specific future research directions informed by chronological developments in the field.
49
+ Formatting and Style:
50
+
51
+ Organize the summary with clear sections that reflect the temporal progression.
52
+ Maintain an academic tone, using specific examples, dates, and quotes where relevant.
53
+ Clearly identify and label sections to enhance readability, and acknowledge any limitations in the available context.
54
+ """
55
+
56
+ self.papers = None
57
+ self.search_agent_response = ""
58
+
59
+ def solve(self, query):
60
+
61
+ # Load vector store
62
+ vector_db = FAISS.load_local("vector_db", embeddings, index_name="base_and_adjacent", allow_dangerous_deserialization=True)
63
+
64
+ # Get chat history
65
+ chat_history = st.session_state.get("chat_history", [])
66
+ chat_history_text = "\n".join([f"{sender}: {msg}" for sender, msg in chat_history[-5:]])
67
+
68
+ # Get relevant chunks from all papers
69
+ retrieved = vector_db.as_retriever(
70
+ search_kwargs={"k": 10} # Increase number of chunks to get broader context
71
+ ).get_relevant_documents(query)
72
+
73
+ # Organize context by paper
74
+ context = self._organize_context(retrieved)
75
+
76
+ # Generate summary
77
+ full_prompt = self.prompt.format(
78
+ chat_history=chat_history_text,
79
+ context=context
80
+ )
81
+
82
+ response = self.model.generate_content(str(self.search_agent_response) + full_prompt)
83
+ return response.text, self.papers
84
+
85
+ def _organize_context(self, documents):
86
+ """
87
+ Organizes retrieved chunks by paper and creates a structured context.
88
+ """
89
+ # Group chunks by paper
90
+ paper_chunks = {}
91
+ for doc in documents:
92
+ paper_id = doc.metadata.get('source', 'unknown')
93
+ if paper_id not in paper_chunks:
94
+ paper_chunks[paper_id] = []
95
+ paper_chunks[paper_id].append(doc.page_content)
96
+
97
+ # Create structured context
98
+ organized_context = []
99
+ for paper_id, chunks in paper_chunks.items():
100
+ paper_context = f"\nPaper: {paper_id}\n"
101
+ paper_context += "\n".join(chunks)
102
+ organized_context.append(paper_context)
103
+
 
 
 
 
 
104
  return "\n\n".join(organized_context)