Spaces:

mgbam
/

Researcher

Sleeping

App Files Files Community

mgbam commited on Apr 9, 2025

Commit

3b19854

verified ·

1 Parent(s): 179c437

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -33

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import xml.etree.ElementTree as ET
 import pandas as pd
 from io import StringIO
 import asyncio
 # ---------------------------
 # Model Loading & Caching
@@ -32,23 +33,19 @@ generator = load_text_generator()
 # Idea Generation Functions
 # ---------------------------
 def generate_ideas_with_hf(prompt):
-    # Use Hugging Face's text-generation pipeline.
-    # We use max_new_tokens so that new tokens are generated beyond the prompt.
     results = generator(prompt, max_new_tokens=50, num_return_sequences=1)
     idea_text = results[0]['generated_text']
     return idea_text
 def generate_ideas_with_openai(prompt, api_key):
     """
-    Generates research ideas using OpenAI's GPT-3.5 model with streaming.
-    This function uses the latest OpenAI SDK v1.0 and asynchronous API calls.
     """
     openai.api_key = api_key
     output_text = ""
     async def stream_chat():
         nonlocal output_text
-        # Asynchronously call the chat completion endpoint with streaming enabled.
         response = await openai.ChatCompletion.acreate(
             model="gpt-3.5-turbo",
             messages=[
@@ -63,7 +60,6 @@ def generate_ideas_with_openai(prompt, api_key):
             text_piece = delta.get("content", "")
             output_text += text_piece
             st_text.text(output_text)
     asyncio.run(stream_chat())
     return output_text
@@ -72,14 +68,13 @@ def generate_ideas_with_openai(prompt, api_key):
 # ---------------------------
 def fetch_arxiv_results(query, max_results=5):
     """
-    Queries arXiv's free API to fetch relevant papers using XML parsing.
     """
     base_url = "http://export.arxiv.org/api/query?"
     search_query = "search_query=all:" + query
     start = "0"
     max_results_str = str(max_results)
     query_url = f"{base_url}{search_query}&start={start}&max_results={max_results_str}"
     response = requests.get(query_url)
     results = []
     if response.status_code == 200:
@@ -88,44 +83,48 @@ def fetch_arxiv_results(query, max_results=5):
         for entry in root.findall("atom:entry", ns):
             title_elem = entry.find("atom:title", ns)
             title = title_elem.text.strip() if title_elem is not None else ""
             summary_elem = entry.find("atom:summary", ns)
             summary = summary_elem.text.strip() if summary_elem is not None else ""
             published_elem = entry.find("atom:published", ns)
             published = published_elem.text.strip() if published_elem is not None else ""
             link_elem = entry.find("atom:id", ns)
             link = link_elem.text.strip() if link_elem is not None else ""
-            authors = []
-            for author in entry.findall("atom:author", ns):
-                name_elem = author.find("atom:name", ns)
-                if name_elem is not None:
-                    authors.append(name_elem.text.strip())
-            authors_str = ", ".join(authors)
             results.append({
                 "title": title,
                 "summary": summary,
                 "published": published,
                 "link": link,
-                "authors": authors_str
             })
         return results
     else:
         return []
 # ---------------------------
 # Streamlit Application Layout
 # ---------------------------
 st.title("Graph of AI Ideas Application with arXiv Integration and OpenAI SDK v1.0")
-# Sidebar Configuration
 st.sidebar.header("Configuration")
 generation_mode = st.sidebar.selectbox("Select Idea Generation Mode",
                                        ["Hugging Face Open Source", "OpenAI GPT-3.5 (Streaming)"])
 openai_api_key = st.sidebar.text_input("OpenAI API Key (for GPT-3.5 Streaming)", type="password")
 # --- Section 1: arXiv Paper Search ---
 st.header("arXiv Paper Search")
@@ -156,13 +155,10 @@ paper_abstract = st.text_area("Enter the research paper abstract:", height=200)
 if st.button("Generate Ideas"):
     if paper_abstract.strip():
         st.subheader("Summarized Abstract")
-        # Summarize the abstract to capture its key points.
         summary = summarizer(paper_abstract, max_length=100, min_length=30, do_sample=False)
         summary_text = summary[0]['summary_text']
         st.write(summary_text)
         st.subheader("Generated Research Ideas")
-        # Build a combined prompt with the abstract and its summary.
         prompt = (
             f"Based on the following research paper abstract, generate innovative and promising research ideas for future work.\n\n"
             f"Paper Abstract:\n{paper_abstract}\n\n"
@@ -183,13 +179,16 @@ if st.button("Generate Ideas"):
     else:
         st.error("Please enter a research paper abstract.")
-# --- Section 3: Knowledge Graph Visualization ---
 st.header("Knowledge Graph Visualization")
 st.markdown(
-    "Simulate a knowledge graph by entering paper details and their citation relationships in CSV format:\n\n"
     "**PaperID,Title,CitedPaperIDs** (CitedPaperIDs separated by ';').\n\n"
-    "Example:\n\n```\n1,Paper A,2;3\n2,Paper B,\n3,Paper C,2\n```"
 )
 papers_csv = st.text_area("Enter paper details in CSV format:", height=150)
 if st.button("Generate Knowledge Graph"):
@@ -204,24 +203,47 @@ if st.button("Generate Knowledge Graph"):
                 cited_list = [c.strip() for c in cited.split(';') if c.strip()]
                 data.append({"paper_id": paper_id, "title": title, "cited": cited_list})
         if data:
-            # Build a directed graph using NetworkX.
             G = nx.DiGraph()
             for paper in data:
                 G.add_node(paper["paper_id"], title=paper.get("title", str(paper["paper_id"])))
                 for cited in paper["cited"]:
                     G.add_edge(paper["paper_id"], cited)
             st.subheader("Knowledge Graph")
-            # Create an interactive visualization using Pyvis.
             net = Network(height="500px", width="100%", directed=True)
-            for node, node_data in G.nodes(data=True):
-                net.add_node(node, label=node_data.get("title", str(node)))
-            for source, target in G.edges():
                 net.add_edge(source, target)
             temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
             net.write_html(temp_file.name)
             with open(temp_file.name, 'r', encoding='utf-8') as f:
                 html_content = f.read()
             st.components.v1.html(html_content, height=500)
     else:
         st.error("Please enter paper details for the knowledge graph.")

 import pandas as pd
 from io import StringIO
 import asyncio
+import base64
 # ---------------------------
 # Model Loading & Caching
 # Idea Generation Functions
 # ---------------------------
 def generate_ideas_with_hf(prompt):
+    # Generate ideas using a Hugging Face model; new tokens beyond the prompt.
     results = generator(prompt, max_new_tokens=50, num_return_sequences=1)
     idea_text = results[0]['generated_text']
     return idea_text
 def generate_ideas_with_openai(prompt, api_key):
     """
+    Generates research ideas using OpenAI's GPT-3.5 (Streaming).
     """
     openai.api_key = api_key
     output_text = ""
     async def stream_chat():
         nonlocal output_text
         response = await openai.ChatCompletion.acreate(
             model="gpt-3.5-turbo",
             messages=[
             text_piece = delta.get("content", "")
             output_text += text_piece
             st_text.text(output_text)
     asyncio.run(stream_chat())
     return output_text
 # ---------------------------
 def fetch_arxiv_results(query, max_results=5):
     """
+    Queries arXiv's free API and parses the result using ElementTree.
     """
     base_url = "http://export.arxiv.org/api/query?"
     search_query = "search_query=all:" + query
     start = "0"
     max_results_str = str(max_results)
     query_url = f"{base_url}{search_query}&start={start}&max_results={max_results_str}"
     response = requests.get(query_url)
     results = []
     if response.status_code == 200:
         for entry in root.findall("atom:entry", ns):
             title_elem = entry.find("atom:title", ns)
             title = title_elem.text.strip() if title_elem is not None else ""
             summary_elem = entry.find("atom:summary", ns)
             summary = summary_elem.text.strip() if summary_elem is not None else ""
             published_elem = entry.find("atom:published", ns)
             published = published_elem.text.strip() if published_elem is not None else ""
             link_elem = entry.find("atom:id", ns)
             link = link_elem.text.strip() if link_elem is not None else ""
+            authors = [author.find("atom:name", ns).text.strip()
+                       for author in entry.findall("atom:author", ns)
+                       if author.find("atom:name", ns) is not None]
             results.append({
                 "title": title,
                 "summary": summary,
                 "published": published,
                 "link": link,
+                "authors": ", ".join(authors)
             })
         return results
     else:
         return []
+# ---------------------------
+# Utility Function: Graph Download Link
+# ---------------------------
+def get_download_link(file_path, filename="graph.html"):
+    """Converts the HTML file to a downloadable link."""
+    with open(file_path, "r", encoding="utf-8") as f:
+        html_data = f.read()
+    b64 = base64.b64encode(html_data.encode()).decode()
+    href = f'<a href="data:text/html;base64,{b64}" download="{filename}">Download Graph as HTML</a>'
+    return href
 # ---------------------------
 # Streamlit Application Layout
 # ---------------------------
 st.title("Graph of AI Ideas Application with arXiv Integration and OpenAI SDK v1.0")
+# Sidebar: Configuration and Layout Options
 st.sidebar.header("Configuration")
 generation_mode = st.sidebar.selectbox("Select Idea Generation Mode",
                                        ["Hugging Face Open Source", "OpenAI GPT-3.5 (Streaming)"])
 openai_api_key = st.sidebar.text_input("OpenAI API Key (for GPT-3.5 Streaming)", type="password")
+layout_option = st.sidebar.selectbox("Select Graph Layout", ["Default", "Force Atlas 2"])
 # --- Section 1: arXiv Paper Search ---
 st.header("arXiv Paper Search")
 if st.button("Generate Ideas"):
     if paper_abstract.strip():
         st.subheader("Summarized Abstract")
         summary = summarizer(paper_abstract, max_length=100, min_length=30, do_sample=False)
         summary_text = summary[0]['summary_text']
         st.write(summary_text)
         st.subheader("Generated Research Ideas")
         prompt = (
             f"Based on the following research paper abstract, generate innovative and promising research ideas for future work.\n\n"
             f"Paper Abstract:\n{paper_abstract}\n\n"
     else:
         st.error("Please enter a research paper abstract.")
+# --- Section 3: Knowledge Graph Visualization with Additional Features ---
 st.header("Knowledge Graph Visualization")
 st.markdown(
+    "Enter paper details and citation relationships in CSV format:\n\n"
     "**PaperID,Title,CitedPaperIDs** (CitedPaperIDs separated by ';').\n\n"
+    "Example:\n\n```\n1,Graph of AI Ideas: Leveraging Knowledge Graphs and LLMs for AI Research Idea Generation,2;3\n2,Fundamental Approaches in AI Literature,\n3,Applications of LLMs in Research Idea Generation,2\n```"
 )
+# Optional filter input for node titles.
+filter_text = st.text_input("Optional: Enter keyword to filter nodes in the graph:")
 papers_csv = st.text_area("Enter paper details in CSV format:", height=150)
 if st.button("Generate Knowledge Graph"):
                 cited_list = [c.strip() for c in cited.split(';') if c.strip()]
                 data.append({"paper_id": paper_id, "title": title, "cited": cited_list})
         if data:
+            # Build the full graph.
             G = nx.DiGraph()
             for paper in data:
                 G.add_node(paper["paper_id"], title=paper.get("title", str(paper["paper_id"])))
                 for cited in paper["cited"]:
                     G.add_edge(paper["paper_id"], cited)
+            # Filter nodes if a keyword is provided.
+            if filter_text.strip():
+                filtered_nodes = [n for n, d in G.nodes(data=True) if filter_text.lower() in d.get("title", "").lower()]
+                if filtered_nodes:
+                    H = G.subgraph(filtered_nodes).copy()
+                else:
+                    H = nx.DiGraph()
+            else:
+                H = G
             st.subheader("Knowledge Graph")
+            # Create the Pyvis network.
             net = Network(height="500px", width="100%", directed=True)
+            # Add nodes with tooltips (show title on hover).
+            for node, node_data in H.nodes(data=True):
+                net.add_node(node, label=node_data.get("title", str(node)), title=node_data.get("title", "No Title"))
+            for source, target in H.edges():
                 net.add_edge(source, target)
+            # Apply layout based on the user's selection.
+            if layout_option == "Force Atlas 2":
+                net.force_atlas_2based()
+            # Write graph to temporary HTML file.
             temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
             net.write_html(temp_file.name)
+            # Show the graph.
             with open(temp_file.name, 'r', encoding='utf-8') as f:
                 html_content = f.read()
             st.components.v1.html(html_content, height=500)
+            # Provide a download link for the graph.
+            st.markdown(get_download_link(temp_file.name), unsafe_allow_html=True)
     else:
         st.error("Please enter paper details for the knowledge graph.")