rairo commited on
Commit
5e32fef
·
verified ·
1 Parent(s): dff737d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -19
app.py CHANGED
@@ -34,6 +34,9 @@ def get_data(url):
34
  "List me all grants or funds, short summary of grant description, "
35
  "the organisations funding them, the value of the grant as an integer, "
36
  "the due date, eligible countries, sector and eligibility criteria for applicants."
 
 
 
37
  ),
38
  source=url,
39
  config=graph_config,
@@ -72,6 +75,8 @@ def process_multiple_urls(urls):
72
  if result and "grants" in result:
73
  all_data["grants"].extend(result["grants"])
74
  except Exception as e:
 
 
75
  st.error(f"⚠️ Error processing URL: {url} - {str(e)}")
76
  continue
77
 
@@ -81,35 +86,57 @@ def process_multiple_urls(urls):
81
 
82
 
83
  def convert_to_csv(data):
84
- df = pd.DataFrame(data["grants"])
85
- return df.to_csv(index=False).encode("utf-8")
 
 
 
 
86
 
87
 
88
  def convert_to_excel(data):
89
- df = pd.DataFrame(data["grants"])
90
- buffer = io.BytesIO()
91
- with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
92
- df.to_excel(writer, sheet_name="Grants", index=False)
93
- return buffer.getvalue()
 
 
 
 
 
 
 
 
 
94
 
95
 
96
  def create_knowledge_base(data):
97
- documents = []
98
- for grant in data["grants"]:
99
- doc_parts = [f"{key.replace('_', ' ').title()}: {value}" for key, value in grant.items()]
100
- documents.append("\n".join(doc_parts))
101
 
102
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
103
- texts = text_splitter.create_documents(documents)
104
 
105
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
106
- vectorstore = FAISS.from_documents(texts, embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  llm = ChatGoogleGenerativeAI(
109
  model="gemini-2.0-flash-thinking-exp", google_api_key=GOOGLE_API_KEY, temperature=0
110
  )
111
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
112
- return ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), memory=memory)
 
113
 
114
 
115
  def get_shareable_link(file_data, file_name, file_type):
@@ -175,7 +202,10 @@ def main():
175
 
176
  # Data Preview and Download Options in Main Panel
177
  with st.expander(f"📊 Preview Grant Data {len(st.session_state.scraped_data['grants'])} grants"):
178
- st.dataframe(st.session_state.scraped_data["grants"])
 
 
 
179
 
180
  col1, col2, col3 = st.columns([1, 1, 2]) # Adjust column widths for better layout
181
 
@@ -223,7 +253,7 @@ def main():
223
  if query:
224
  if st.session_state.qa_chain:
225
  with st.spinner("Generating response..."):
226
- response = st.session_state.qa_chain({"question": query})
227
  st.session_state.chat_history.append({"query": query, "response": response["answer"]})
228
  else:
229
  st.error("Knowledge base not initialized. Please load data as knowledge base.")
 
34
  "List me all grants or funds, short summary of grant description, "
35
  "the organisations funding them, the value of the grant as an integer, "
36
  "the due date, eligible countries, sector and eligibility criteria for applicants."
37
+
38
+ "if you cant find grants return any useful information you can find"
39
+
40
  ),
41
  source=url,
42
  config=graph_config,
 
75
  if result and "grants" in result:
76
  all_data["grants"].extend(result["grants"])
77
  except Exception as e:
78
+ st.write(result)
79
+ all_data = result
80
  st.error(f"⚠️ Error processing URL: {url} - {str(e)}")
81
  continue
82
 
 
86
 
87
 
88
  def convert_to_csv(data):
89
+ try:
90
+ df = pd.DataFrame(data["grants"])
91
+ return df.to_csv(index=False).encode("utf-8")
92
+ except:
93
+ df = pd.DataFrame(data)
94
+ return df.to_csv(index=False).encode("utf-8")
95
 
96
 
97
  def convert_to_excel(data):
98
+ try:
99
+ df = pd.DataFrame(data["grants"])
100
+ buffer = io.BytesIO()
101
+ with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
102
+ df.to_excel(writer, sheet_name="Grants", index=False)
103
+ return buffer.getvalue()
104
+
105
+ except:
106
+ df = pd.DataFrame(data)
107
+ buffer = io.BytesIO()
108
+ with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
109
+ df.to_excel(writer, sheet_name="Grants", index=False)
110
+ return buffer.getvalue()
111
+
112
 
113
 
114
  def create_knowledge_base(data):
115
+ # Store JSON representation of data in session state
116
+ st.session_state.knowledge_base_json = json.dumps(data, indent=2)
 
 
117
 
 
 
118
 
119
+ def chat_with_knowledge_base(query):
120
+ if "knowledge_base_json" not in st.session_state:
121
+ return "Knowledge base not initialized. Please load grant data first."
122
+
123
+ context = st.session_state.knowledge_base_json
124
+ prompt = f"""
125
+ You are an AI assistant that helps users analyze grant opportunities.
126
+ Here is the extracted grant data in JSON format:
127
+
128
+ {context}
129
+
130
+ User's question: {query}
131
+ Answer the question based on the provided grant data.
132
+ """
133
 
134
  llm = ChatGoogleGenerativeAI(
135
  model="gemini-2.0-flash-thinking-exp", google_api_key=GOOGLE_API_KEY, temperature=0
136
  )
137
+
138
+ response = llm.invoke(prompt)
139
+ return response
140
 
141
 
142
  def get_shareable_link(file_data, file_name, file_type):
 
202
 
203
  # Data Preview and Download Options in Main Panel
204
  with st.expander(f"📊 Preview Grant Data {len(st.session_state.scraped_data['grants'])} grants"):
205
+ try:
206
+ st.dataframe(st.session_state.scraped_data["grants"])
207
+ except:
208
+ st.dataframe(st.session_state.scraped_data)
209
 
210
  col1, col2, col3 = st.columns([1, 1, 2]) # Adjust column widths for better layout
211
 
 
253
  if query:
254
  if st.session_state.qa_chain:
255
  with st.spinner("Generating response..."):
256
+ response = chat_with_knowledge_base(query)
257
  st.session_state.chat_history.append({"query": query, "response": response["answer"]})
258
  else:
259
  st.error("Knowledge base not initialized. Please load data as knowledge base.")