Future_Directions / src /streamlit_app.py
shubham142000's picture
Update src/streamlit_app.py
2d997a7 verified
# # import os
# # import json
# # import streamlit as st
# # import pandas as pd
# # import plotly.express as px
# # from together import Together
# # from dotenv import load_dotenv
# # import re
# # # -------------------#
# # # Secure API key load
# # # -------------------#
# # load_dotenv()
# # TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY", "987adcf573b9658c775b671270aef959b3d38793771932f372f9f2a9ed5b78bf")
# # client = Together(api_key=TOGETHER_API_KEY)
# # # -------------------#
# # # Streamlit UI setup
# # # -------------------#
# # st.set_page_config(page_title="FutureScope: Research Direction Explorer", layout="wide")
# # st.markdown("""
# # <style>
# # body {
# # background: linear-gradient(135deg, #0f2027, #203a43, #2c5364);
# # color: #FFFFFF;
# # }
# # h1, h2, h3 {
# # text-align: center;
# # color: #FFD700;
# # font-family: 'Poppins', sans-serif;
# # }
# # .footer {
# # position: fixed;
# # left: 0;
# # bottom: 0;
# # width: 100%;
# # color: white;
# # text-align: center;
# # padding: 10px;
# # background-color: rgba(0,0,0,0.4);
# # }
# # .stButton > button {
# # background-color: #FFD700 !important;
# # color: black !important;
# # font-weight: bold;
# # border-radius: 10px;
# # }
# # </style>
# # """, unsafe_allow_html=True)
# # # -------------------#
# # # App Title
# # # -------------------#
# # st.markdown("<h1>๐Ÿงญ FutureScope: Research Direction Explorer</h1>", unsafe_allow_html=True)
# # st.markdown("<p style='text-align:center;'>Discover how your research area evolved and where it's heading next ๐Ÿš€</p>", unsafe_allow_html=True)
# # # -------------------#
# # # User Input
# # # -------------------#
# # user_topic = st.text_input("๐Ÿ” Enter your research topic", placeholder="e.g. Graph Neural Networks for Drug Discovery")
# # # -------------------#
# # # Main Logic
# # # -------------------#
# # if st.button("Generate Research Insights"):
# # if not user_topic.strip():
# # st.warning("โš ๏ธ Please enter a valid research topic.")
# # else:
# # with st.spinner("Analyzing topic evolution and forecasting future directions... โณ"):
# # # Prompt Design
# # prompt = f"""
# # You are a world-class AI research assistant specialized in analyzing research trends.
# # Given the topic: "{user_topic}", perform the following:
# # 1. Summarize how this research area has evolved in the past 10โ€“15 years.
# # 2. Identify key milestones and subfields in a timeline format.
# # 3. Predict 3โ€“5 future research directions and explain why each matters.
# # Return the output strictly in JSON format like this:
# # {{
# # "evolution_summary": "...",
# # "timeline": [{{"year": ..., "trend": "..."}}, ...],
# # "future_directions": [{{"title": "...", "reason": "..."}}, ...]
# # }}
# # """
# # # Call Together API
# # response = client.chat.completions.create(
# # model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
# # messages=[{"role": "user", "content": prompt}]
# # )
# # raw_content = response.choices[0].message.content
# # # -------------------#
# # # JSON Cleaning & Parsing
# # # -------------------#
# # def extract_json(text):
# # """Extract valid JSON portion from the model response."""
# # text = text.strip()
# # text = re.sub(r"^```json|```$", "", text).strip() # remove code fences
# # match = re.search(r'\{.*\}', text, re.DOTALL)
# # if match:
# # return match.group(0)
# # return text
# # cleaned = extract_json(raw_content)
# # try:
# # data = json.loads(cleaned)
# # except Exception as e:
# # st.error(f"โš ๏ธ Failed to parse JSON: {e}")
# # st.text_area("Raw Response", raw_content, height=300)
# # st.stop()
# # # -------------------#
# # # Display Results
# # # -------------------#
# # st.markdown("## ๐Ÿงฉ Evolution Summary")
# # st.markdown(f"<div style='background:#1e2a38;padding:15px;border-radius:10px;'>{data['evolution_summary']}</div>", unsafe_allow_html=True)
# # # Timeline Chart
# # if "timeline" in data and len(data["timeline"]) > 0:
# # df = pd.DataFrame(data["timeline"])
# # if "year" in df.columns and "trend" in df.columns:
# # fig = px.scatter(df, x="year", y="trend", title="๐Ÿ“ˆ Topic Evolution Over Time",
# # size=[10]*len(df), text="trend", color_discrete_sequence=["gold"])
# # fig.update_traces(textposition='top center', marker=dict(symbol="circle"))
# # fig.update_layout(template="plotly_dark", height=500)
# # st.plotly_chart(fig, use_container_width=True)
# # else:
# # st.warning("Timeline data invalid โ€” showing raw table:")
# # st.dataframe(df)
# # # Future Directions
# # st.markdown("## ๐Ÿ”ฎ Predicted Future Directions")
# # for item in data.get("future_directions", []):
# # st.markdown(f"""
# # <div style='background:#142733;padding:15px;margin:10px;border-radius:10px;'>
# # <h4>๐Ÿง  {item['title']}</h4>
# # <p>{item['reason']}</p>
# # </div>
# # """, unsafe_allow_html=True)
# # # Tools: Copy / Download
# # col1, col2 = st.columns(2)
# # with col1:
# # if st.button("๐Ÿ“‹ Copy Insights"):
# # st.write("Copied to clipboard! (Use Ctrl+C manually to copy)")
# # with col2:
# # st.download_button(
# # label="๐Ÿ’พ Download JSON",
# # data=json.dumps(data, indent=2),
# # file_name=f"{user_topic.replace(' ','_')}_future_directions.json",
# # mime="application/json"
# # )
# # # -------------------#
# # # Footer
# # # -------------------#
# # st.markdown("<div class='footer'>ยฉ Group 6 ILP TCS Research ", unsafe_allow_html=True)
import os
import json
import streamlit as st
import pandas as pd
import plotly.express as px
from together import Together
from dotenv import load_dotenv
import re
# -------------------#
# Secure API key load
# -------------------#
load_dotenv()
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY", "987adcf573b9658c775b671270aef959b3d38793771932f372f9f2a9ed5b78bf")
client = Together(api_key=TOGETHER_API_KEY)
# -------------------#
# Streamlit UI setup
# -------------------#
st.set_page_config(page_title="PreSearch : Research Direction Explorer", layout="wide")
# Detect Streamlit theme
try:
theme = st.get_option("theme.base")
except:
theme = "dark"
# Define theme colors
if theme == "light":
BACKGROUND_GRADIENT = "linear-gradient(135deg, #f9f9f9, #eaeaea, #dddddd)"
TEXT_COLOR = "#000000"
TITLE_COLOR = "#DAA520"
CARD_BG = "#ffffff"
FOOTER_BG = "rgba(0, 0, 0, 0.1)"
else:
BACKGROUND_GRADIENT = "linear-gradient(135deg, #0f2027, #203a43, #2c5364)"
TEXT_COLOR = "#FFFFFF"
TITLE_COLOR = "#FFD700"
CARD_BG = "#1e2a38"
FOOTER_BG = "rgba(0, 0, 0, 0.4)"
# -------------------#
# Dynamic CSS Styling
# -------------------#
st.markdown(f"""
<style>
body {{
background: {BACKGROUND_GRADIENT};
color: {TEXT_COLOR};
font-family: 'Poppins', sans-serif;
}}
h1, h2, h3 {{
text-align: center;
color: {TITLE_COLOR};
}}
.footer {{
position: fixed;
left: 0;
bottom: 0;
width: 100%;
color: {TEXT_COLOR};
text-align: center;
padding: 10px;
background-color: {FOOTER_BG};
}}
.stButton > button {{
background-color: {TITLE_COLOR} !important;
color: black !important;
font-weight: bold;
border-radius: 10px;
}}
div[data-testid="stMarkdownContainer"] p {{
color: {TEXT_COLOR};
}}
</style>
""", unsafe_allow_html=True)
# -------------------#
# App Title
# -------------------#
st.markdown("<h1>๐Ÿงญ PreSearch: Research Direction Explorer</h1>", unsafe_allow_html=True)
st.markdown("<p style='text-align:center;'>Discover how your research area evolved and where it's heading next ๐Ÿš€</p>", unsafe_allow_html=True)
# -------------------#
# User Input
# -------------------#
user_topic = st.text_input("๐Ÿ” Enter your research topic", placeholder="e.g. Graph Neural Networks for Drug Discovery")
# -------------------#
# Main Logic
# -------------------#
if st.button("Generate Research Insights"):
if not user_topic.strip():
st.warning("โš ๏ธ Please enter a valid research topic.")
else:
with st.spinner("Analyzing topic evolution and forecasting future directions... โณ"):
# Prompt Design
prompt = f"""
You are a world-class AI research assistant specialized in analyzing research trends.
Given the topic: "{user_topic}", perform the following:
1. Summarize how this research area has evolved in the past 10โ€“15 years.
2. Identify key milestones and subfields in a timeline format.
3. Predict 3โ€“5 future research directions and explain why each matters.
Return the output strictly in JSON format like this:
{{
"evolution_summary": "...",
"timeline": [{{"year": ..., "trend": "..."}}, ...],
"future_directions": [{{"title": "...", "reason": "..."}}, ...]
}}
"""
# Call Together API
response = client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
messages=[{"role": "user", "content": prompt}]
)
raw_content = response.choices[0].message.content
# -------------------#
# JSON Cleaning & Parsing
# -------------------#
def extract_json(text):
text = text.strip()
text = re.sub(r"^```json|```$", "", text).strip()
match = re.search(r'\{.*\}', text, re.DOTALL)
return match.group(0) if match else text
cleaned = extract_json(raw_content)
try:
data = json.loads(cleaned)
except Exception as e:
st.error(f"โš ๏ธ Failed to parse JSON: {e}")
st.text_area("Raw Response", raw_content, height=300)
st.stop()
# -------------------#
# Display Results
# -------------------#
st.markdown("## ๐Ÿงฉ Evolution Summary")
st.markdown(f"<div style='background:{CARD_BG};padding:15px;border-radius:10px;color:{TEXT_COLOR};'>{data['evolution_summary']}</div>", unsafe_allow_html=True)
# Timeline Chart
if "timeline" in data and len(data["timeline"]) > 0:
df = pd.DataFrame(data["timeline"])
if "year" in df.columns and "trend" in df.columns:
fig = px.scatter(df, x="year", y="trend", title="๐Ÿ“ˆ Topic Evolution Over Time",
size=[10]*len(df), text="trend", color_discrete_sequence=["gold"])
fig.update_traces(textposition='top center', marker=dict(symbol="circle"))
fig.update_layout(template="plotly_dark" if theme == "dark" else "plotly_white", height=500)
st.plotly_chart(fig, use_container_width=True)
else:
st.warning("Timeline data invalid โ€” showing raw table:")
st.dataframe(df)
# Future Directions
st.markdown("## ๐Ÿ”ฎ Predicted Future Directions")
for item in data.get("future_directions", []):
st.markdown(f"""
<div style='background:{CARD_BG};padding:15px;margin:10px;border-radius:10px;color:{TEXT_COLOR};'>
<h4>๐Ÿง  {item['title']}</h4>
<p>{item['reason']}</p>
</div>
""", unsafe_allow_html=True)
# Tools: Copy / Download
col1, col2 = st.columns(2)
with col1:
if st.button("๐Ÿ“‹ Copy Insights"):
st.write("Copied to clipboard! (Use Ctrl+C manually to copy)")
with col2:
st.download_button(
label="๐Ÿ’พ Download JSON",
data=json.dumps(data, indent=2),
file_name=f"{user_topic.replace(' ','_')}_future_directions.json",
mime="application/json"
)
# -------------------#
# Footer
# -------------------#
st.markdown(f"<div class='footer'>ยฉ Group 6 ILP TCS Research</div>", unsafe_allow_html=True)
# import os, json, time, re, requests, random
# import pandas as pd
# import streamlit as st
# from together import Together
# # =========================
# # 0๏ธโƒฃ Configuration & Setup
# # =========================
# st.set_page_config(page_title="๐Ÿ“š pResearch Retrieval", layout="wide", page_icon=":books:")
# st.title("๐Ÿค– **pResearch: Multi-Agent Research Retrieval System**")
# st.caption("Built with LLM-based reasoning, multi-agent intelligence, and human-in-loop control.")
# st.markdown("---")
# TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY", "987adcf573b9658c775b671270aef959b3d38793771932f372f9f2a9ed5b78bf")
# SEMANTIC_API_KEY = os.getenv("SEMANTIC_API_KEY", "b2EsaPVVN1890PXdCeum37K9zKq4AYY46n8QyLvp")
# client = Together(api_key=TOGETHER_API_KEY)
# # =========================
# # Unified LLM Call
# # =========================
# @st.cache_data(show_spinner=False)
# def llm_call(prompt: str, temperature=0.2, max_retries=3):
# for attempt in range(max_retries):
# try:
# resp = client.chat.completions.create(
# model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
# messages=[{"role": "user", "content": prompt}],
# temperature=temperature
# )
# return resp.choices[0].message.content.strip()
# except Exception as e:
# time.sleep(1 + attempt)
# return "LLM error (see logs)"
# # ============================================================
# # 1๏ธโƒฃ Query Reformulator Agent
# # ============================================================
# def agent_query_reformulator(query: str):
# prompt = f"""
# You are an expert academic assistant.
# Reformulate the query below into 5 semantically diverse and rich alternatives
# that explore different perspectives (methods, datasets, applications, etc.)
# Query: "{query}"
# Respond in JSON format:
# {{
# "reformulated_queries": [
# {{ "id": 1, "query": "..." }},
# {{ "id": 2, "query": "..." }},
# {{ "id": 3, "query": "..." }},
# {{ "id": 4, "query": "..." }},
# {{ "id": 5, "query": "..." }}
# ]
# }}
# """
# output = llm_call(prompt)
# cleaned = re.sub(r"```json|```", "", output).strip()
# try:
# data = json.loads(cleaned)
# queries = [q["query"] for q in data.get("reformulated_queries", []) if "query" in q]
# except Exception:
# queries = []
# # fallback diversity
# while len(queries) < 5:
# alt = llm_call(f"Generate a diverse reformulation of: {query}")
# queries.append(alt[:300])
# queries = list(dict.fromkeys(queries))[:5]
# return {"original_query": query, "reformulated_queries": [{"id": i+1, "query": q} for i, q in enumerate(queries)]}
# # ============================================================
# # 2๏ธโƒฃ Retriever Agent (Semantic Scholar)
# # ============================================================
# def agent_retriever(query, top_k=20):
# url = "https://api.semanticscholar.org/graph/v1/paper/search"
# headers = {"x-api-key": SEMANTIC_API_KEY}
# params = {
# "query": query, "limit": top_k,
# "fields": "paperId,title,abstract,year,authors,url,venue,citationCount"
# }
# resp = requests.get(url, headers=headers, params=params)
# if resp.status_code != 200:
# return []
# return resp.json().get("data", [])
# # ============================================================
# # 3๏ธโƒฃ Reranker Agent
# # ============================================================
# def agent_reranker(query, papers):
# cleaned_papers = [p for p in papers if p.get("abstract")]
# random.shuffle(cleaned_papers)
# for batch_start in range(0, len(cleaned_papers), 5):
# batch = cleaned_papers[batch_start:batch_start+5]
# papers_str = "\n\n".join([
# f"[{i+1}] Title: {p.get('title','N/A')}\nAbstract: {p.get('abstract','')[:500]}"
# for i,p in enumerate(batch)
# ])
# prompt = f"""
# You are a relevance scoring agent.
# Given a research query and 5 papers, assign a score (0โ€“1) to each paper for its relevance.
# Query: {query}
# Papers:
# {papers_str}
# Respond strictly in JSON:
# {{ "results": [{{"id": 1, "score": 0.85}}, ...] }}
# """
# output = llm_call(prompt)
# cleaned = re.sub(r"```json|```", "", output).strip()
# try:
# results = json.loads(cleaned)["results"]
# for i,r in enumerate(results):
# cleaned_papers[batch_start+i]["semantic_score"] = r.get("score",0)
# except:
# for i in range(len(batch)):
# cleaned_papers[batch_start+i]["semantic_score"] = 0.0
# return sorted(cleaned_papers, key=lambda x: x.get("semantic_score",0), reverse=True)
# # ============================================================
# # 4๏ธโƒฃ Weighting Agent (Meta Scorer)
# # ============================================================
# def agent_weighting(papers):
# prompt = """
# You are an expert in bibliometrics.
# Assign importance weights (sum=1.0) for how papers should be ranked based on:
# - semantic_score (LLM relevance)
# - citationCount
# - recency
# - venue quality
# Return JSON:
# {"weights":{"semantic_score":0.55,"citations":0.25,"recency":0.15,"venue":0.05}}
# """
# output = llm_call(prompt)
# cleaned = re.sub(r"```json|```", "", output).strip()
# try:
# weights = json.loads(cleaned)["weights"]
# except:
# weights = {"semantic_score":0.55,"citations":0.25,"recency":0.15,"venue":0.05}
# total = sum(weights.values())
# return {k:v/total for k,v in weights.items()}
# # ============================================================
# # 5๏ธโƒฃ Meta-Scoring and Ranking
# # ============================================================
# def agent_meta_scorer(papers, weights):
# current_year = 2025
# prestige = {"CVPR":1.0,"ICCV":0.95,"ECCV":0.9,"NEURIPS":0.9,"ICML":0.85,"AAAI":0.8,"IJCAI":0.8,"ARXIV":0.4}
# for p in papers:
# sem = p.get("semantic_score",0)
# cit = min(p.get("citationCount",0)/1000,1.0)
# rec = max(0, 1 - (current_year - p.get("year",2000))/10)
# venue_name = (p.get("venue") or "").upper()
# ven = next((v for k,v in prestige.items() if k in venue_name), 0.3)
# p["final_score"] = (
# weights["semantic_score"]*sem +
# weights["citations"]*cit +
# weights["recency"]*rec +
# weights["venue"]*ven
# )
# return sorted(papers, key=lambda x: x["final_score"], reverse=True)
# # ============================================================
# # 6๏ธโƒฃ Critique Agent
# # ============================================================
# def agent_critique(papers, query):
# top_titles = [p["title"] for p in papers[:5]]
# prompt = f"""
# As a research critic, evaluate whether these top papers are relevant to:
# "{query}"
# Papers: {json.dumps(top_titles, indent=2)}
# Respond as JSON:
# {{ "critique": "...", "relevance_score": 0โ€“1 }}
# """
# output = llm_call(prompt)
# cleaned = re.sub(r"```json|```", "", output).strip()
# try:
# return json.loads(cleaned)
# except:
# return {"critique":"Automatic check fallback.","relevance_score":0.7}
# # ============================================================
# # 7๏ธโƒฃ Human-in-Loop Fallback
# # ============================================================
# def human_feedback_loop(papers):
# st.warning("โš ๏ธ Low relevance detected โ€” human feedback required.")
# for i,p in enumerate(papers[:3]):
# st.markdown(f"**{i+1}. {p['title']}** *(Score: {p['final_score']:.3f})*")
# st.caption(f"{p.get('abstract','')[:250]}...")
# choice = st.radio("Approve ranking?", ["Yes","No"], index=0)
# if choice == "No":
# st.info("๐Ÿ”„ Re-ranking by citation count.")
# papers = sorted(papers, key=lambda x: x.get("citationCount",0), reverse=True)
# return papers
# # ============================================================
# # 8๏ธโƒฃ Streamlit Master Orchestrator
# # ============================================================
# def run_pipeline(query, top_k=10):
# st.markdown("## ๐Ÿงฉ Stage 1: Query Reformulation")
# with st.spinner("Generating diverse reformulations..."):
# q_data = agent_query_reformulator(query)
# queries = [query] + [q["query"] for q in q_data["reformulated_queries"]]
# for q in queries[1:]:
# st.markdown(f"๐Ÿ”น *{q}*")
# st.markdown("## ๐Ÿ” Stage 2: Retrieval")
# all_papers = []
# progress = st.progress(0)
# for i, q in enumerate(queries):
# new_papers = agent_retriever(q, top_k)
# all_papers.extend(new_papers)
# progress.progress((i+1)/len(queries))
# time.sleep(0.3)
# progress.empty()
# st.success(f"โœ… Retrieved {len(all_papers)} papers in total.")
# st.markdown("## ๐Ÿง  Stage 3: Semantic Reranking")
# with st.spinner("Reranking papers semantically..."):
# reranked = agent_reranker(query, all_papers)
# st.info(f"Top paper after rerank: **{reranked[0]['title']}**")
# st.markdown("## โš–๏ธ Stage 4: Weighting Agent & Meta-Scoring")
# weights = agent_weighting(reranked)
# st.json(weights)
# meta_ranked = agent_meta_scorer(reranked, weights)
# st.markdown("## ๐Ÿ”Ž Stage 5: Critique Agent")
# critique = agent_critique(meta_ranked, query)
# st.info(f"**Critique:** {critique['critique']} | Relevance: {critique['relevance_score']:.2f}")
# if critique["relevance_score"] < 0.6:
# meta_ranked = human_feedback_loop(meta_ranked)
# df = pd.DataFrame(meta_ranked)
# st.download_button(
# label="๐Ÿ’พ Download Results as CSV",
# data=df.to_csv(index=False),
# file_name="final_ranked_papers.csv",
# mime="text/csv"
# )
# st.dataframe(df.head(20))
# st.success("๐ŸŽฏ Pipeline completed successfully!")
# # ============================================================
# # 9๏ธโƒฃ Streamlit UI
# # ============================================================
# with st.form("research_form"):
# query = st.text_input("Enter your research query:", "spatio-temporal action detection and localization")
# top_k = st.slider("Number of papers per reformulation", 5, 50, 10)
# run = st.form_submit_button("๐Ÿš€ Run Multi-Agent Retrieval")
# if run:
# run_pipeline(query, top_k)