| import streamlit as st |
| import requests |
| import PyPDF2 |
| from typing import Optional, Dict, List |
| import json |
| from langchain.text_splitter import RecursiveCharacterTextSplitter |
| from concurrent.futures import ThreadPoolExecutor |
| import xml.etree.ElementTree as ET |
| import re |
| from datetime import datetime |
| import time |
| from dotenv import load_dotenv |
| import os |
| import pandas as pd |
|
|
| |
| load_dotenv() |
| PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY") |
| PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions" |
| SAPLING_API_KEY = os.getenv("SAPLING_API_KEY") |
|
|
|
|
| def call_perplexity_api(prompt: str) -> str: |
| """Call Perplexity AI with a prompt, return the text response if successful.""" |
| headers = { |
| "Authorization": f"Bearer {PERPLEXITY_API_KEY}", |
| "Content-Type": "application/json", |
| } |
|
|
| payload = { |
| "model": "llama-3.1-sonar-small-128k-chat", |
| "messages": [{"role": "user", "content": prompt}], |
| "temperature": 0.3, |
| } |
|
|
| try: |
| response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload) |
| response.raise_for_status() |
| return response.json()["choices"][0]["message"]["content"] |
| except Exception as e: |
| st.error(f"API Error: {str(e)}") |
| return "" |
|
|
|
|
| def extract_text_from_pdf(pdf_file): |
| """Extract text content from a PDF file.""" |
| pdf_reader = PyPDF2.PdfReader(pdf_file) |
| text = "" |
| for page in pdf_reader.pages: |
| text += page.extract_text() + "\n" |
| return text |
|
|
|
|
| def analyze_paper(text: str, category: str) -> str: |
| """Generate a prompt and get analysis for a specific category.""" |
| prompts = { |
| "Journal": "In which journal was this research published:", |
| "Journal Quality": "What is the quality or impact factor of the journal in which this research was published:", |
| "No Of Citations": "How many times has this research paper been cited:", |
| "Date Of Publications": "When was this research paper published:", |
| "Title": "What is the title of this research paper:", |
| "Abstract": "Provide a summarized version of the abstract of this paper:", |
| "Author Keywords": "What keywords were provided by the authors for this research paper:", |
| "Theories Used in The Paper": "What theories are utilized or referenced in this research paper:", |
| "Context Used In the Paper": "What is the specific context or scenario used in this research:", |
| "Methods and Material Used in This Paper": "What methods and materials are used in conducting this research:", |
| "Antecedents and Problems": "What antecedents and problems are identified in this research:", |
| "Decision and Frameworks To Solve the Problem": "What decision-making frameworks or solutions are proposed in this research:", |
| "Outcomes": "What are the outcomes or results of this research:", |
| "Study Findings": "What are the detailed findings of this research study:", |
| "Conclusions": "What conclusions are drawn from this research:", |
| "TSC ADO": "Provide details about the TSC ADO (Theory-Specific Constructs Applied in this research):" |
| } |
|
|
| if category in prompts: |
| prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" |
| else: |
| prompt = f"Analyze the following text for the category '{category}':\n\nPaper text: {text[:5000]}" |
| return call_perplexity_api(prompt) |
|
|
|
|
| class ResearchAssistant: |
| def __init__(self, perplexity_key: str): |
| self.perplexity_key = perplexity_key |
|
|
| def chat_with_pdf(self, pdf_text: str, query: str) -> Dict: |
| chunks = self._split_text(pdf_text) |
| relevant_chunks = self._get_relevant_chunks(chunks, query) |
|
|
| prompt = f"Context from PDF:\n\n{relevant_chunks}\n\nQuestion: {query}" |
| response_text = call_perplexity_api(prompt) |
| return {"choices": [{"message": {"content": response_text}}]} |
|
|
| def generate_literature_review(self, topic: str) -> Dict: |
| try: |
| |
| papers = self._search_arxiv(topic) |
| if not papers: |
| return {"error": "No papers found on the topic"} |
|
|
| |
| papers_summary = "\n\n".join( |
| [ |
| f"Paper: {p['title']}\nAuthors: {', '.join(p['authors'])}\nSummary: {p['summary']}" |
| for p in papers |
| ] |
| ) |
|
|
| prompt = f"""Generate a comprehensive literature review on '{topic}'. Based on these papers: |
| |
| {papers_summary} |
| |
| Structure the review as follows: |
| 1. Introduction and Background |
| 2. Current Research Trends |
| 3. Key Findings and Themes |
| 4. Research Gaps |
| 5. Future Directions""" |
|
|
| response_text = call_perplexity_api(prompt) |
| return {"choices": [{"message": {"content": response_text}}]} |
| except Exception as e: |
| return {"error": f"Literature review generation failed: {str(e)}"} |
|
|
| def ai_writer(self, outline: str, references: List[str]) -> Dict: |
| prompt = f"""Write a research paper following this structure: |
| |
| Outline: |
| {outline} |
| |
| References to incorporate: |
| {json.dumps(references)} |
| |
| Instructions: |
| - Follow academic writing style |
| - Include appropriate citations |
| - Maintain logical flow |
| - Include introduction and conclusion""" |
|
|
| response_text = call_perplexity_api(prompt) |
| return {"choices": [{"message": {"content": response_text}}]} |
|
|
| def refine_response(self, response: str, column: str) -> str: |
| prompt = f"""Refine the following response to fit the '{column}' column in a research paper CSV format: |
| |
| Response: {response} |
| |
| Ensure the response is clear, concise, and fits the context of the column.""" |
|
|
| refined_response = call_perplexity_api(prompt) |
| return refined_response |
|
|
| def paraphrase(self, text: str) -> Dict: |
| prompt = f"""Paraphrase the following text while: |
| - Maintaining academic tone |
| - Preserving key meaning |
| - Improving clarity |
| |
| Text: {text}""" |
|
|
| response_text = call_perplexity_api(prompt) |
| return {"choices": [{"message": {"content": response_text}}]} |
|
|
| def generate_citation(self, paper_info: Dict, style: str = "APA") -> Dict: |
| prompt = f"""Generate a {style} citation for: |
| Title: {paper_info['title']} |
| Authors: {', '.join(paper_info['authors'])} |
| Year: {paper_info['year']} |
| |
| Follow exact {style} format guidelines.""" |
|
|
| response_text = call_perplexity_api(prompt) |
| return {"citation": response_text} |
|
|
| def detect_ai_content(self, text: str) -> Dict: |
| prompt = f"""You are an AI content detector. Analyze the text for: |
| 1. Writing style consistency |
| 2. Language patterns |
| 3. Contextual coherence |
| 4. Common AI patterns |
| Provide a clear analysis with confidence level. |
| |
| Text: {text}""" |
|
|
| response = requests.post( |
| "https://api.sapling.ai/api/v1/aidetect", |
| json={"key": SAPLING_API_KEY, "text": text}, |
| ) |
| st.info( |
| "A score from 0 to 1 will be returned, with 0 indicating the maximum confidence that the text is human-written, and 1 indicating the maximum confidence that the text is AI-generated." |
| ) |
|
|
| if response.status_code == 200: |
| return {"choices": [{"message": {"content": response.json()}}]} |
| else: |
| return { |
| "error": f"Sapling API Error: {response.status_code} - {response.text}" |
| } |
|
|
| def _split_text(self, text: str) -> List[str]: |
| splitter = RecursiveCharacterTextSplitter( |
| chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " ", ""] |
| ) |
| return splitter.split_text(text) |
|
|
| def _get_relevant_chunks(self, chunks: List[str], query: str) -> str: |
| |
| query_words = set(query.lower().split()) |
| scored_chunks = [] |
|
|
| for chunk in chunks: |
| chunk_words = set(chunk.lower().split()) |
| score = len(query_words.intersection(chunk_words)) |
| scored_chunks.append((score, chunk)) |
|
|
| scored_chunks.sort(reverse=True) |
| return "\n\n".join(chunk for _, chunk in scored_chunks[:3]) |
|
|
| def _search_arxiv(self, topic: str) -> List[Dict]: |
| try: |
| query = "+AND+".join(topic.split()) |
| url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5" |
| response = requests.get(url, timeout=10) |
| response.raise_for_status() |
| return self._parse_arxiv_response(response.text) |
| except Exception as e: |
| print(f"arXiv search failed: {str(e)}") |
| return [] |
|
|
| def _parse_arxiv_response(self, response_text: str) -> List[Dict]: |
| try: |
| root = ET.fromstring(response_text) |
| papers = [] |
| for entry in root.findall("{http://www.w3.org/2005/Atom}entry"): |
| paper = { |
| "id": entry.find("{http://www.w3.org/2005/Atom}id").text, |
| "title": entry.find( |
| "{http://www.w3.org/2005/Atom}title" |
| ).text.strip(), |
| "summary": entry.find( |
| "{http://www.w3.org/2005/Atom}summary" |
| ).text.strip(), |
| "authors": [ |
| author.find("{http://www.w3.org/2005/Atom}name").text.strip() |
| for author in entry.findall( |
| "{http://www.w3.org/2005/Atom}author" |
| ) |
| ], |
| "published": entry.find( |
| "{http://www.w3.org/2005/Atom}published" |
| ).text[:10], |
| } |
| papers.append(paper) |
| return papers |
| except Exception as e: |
| print(f"arXiv response parsing failed: {str(e)}") |
| return [] |
|
|
|
|
| def main(): |
| |
| st.title("Research Copilot") |
|
|
| if not PERPLEXITY_API_KEY: |
| st.warning("Perplexity API key not found in environment variables.") |
| return |
|
|
| assistant = ResearchAssistant(PERPLEXITY_API_KEY) |
|
|
| tabs = st.tabs( |
| [ |
| "Chat with PDF", |
| "Literature Review", |
| "AI Writer", |
| "Extract Data", |
| "Paraphraser", |
| "Citation Generator", |
| "AI Detector", |
| ] |
| ) |
|
|
| with tabs[0]: |
| st.header("Chat with PDF") |
|
|
| |
| col1, col2 = st.columns([3, 1]) |
| with col1: |
| uploaded_file = st.file_uploader("Upload PDF", type="pdf", key="pdf_chat") |
| with col2: |
| if st.button("Clear PDF"): |
| st.session_state.pop("pdf_text", None) |
| st.rerun() |
|
|
| if uploaded_file: |
| if "pdf_text" not in st.session_state: |
| with st.spinner("Processing PDF..."): |
| reader = PyPDF2.PdfReader(uploaded_file) |
| st.session_state.pdf_text = "" |
| for page in reader.pages: |
| st.session_state.pdf_text += page.extract_text() |
| st.success("PDF processed successfully!") |
|
|
| query = st.text_input("Ask a question about the PDF") |
| if query: |
| with st.spinner("Analyzing..."): |
| response = assistant.chat_with_pdf(st.session_state.pdf_text, query) |
| if "error" in response: |
| st.error(response["error"]) |
| else: |
| st.write(response["choices"][0]["message"]["content"]) |
|
|
| with tabs[1]: |
| st.header("Literature Review") |
| topic = st.text_input("Enter research topic") |
| if st.button("Generate Review") and topic: |
| with st.spinner("Generating literature review..."): |
| review = assistant.generate_literature_review(topic) |
| if "error" in review: |
| st.error(review["error"]) |
| else: |
| st.write(review["choices"][0]["message"]["content"]) |
|
|
| with tabs[2]: |
| st.header("AI Writer") |
| outline = st.text_area("Enter paper outline") |
| references = st.text_area("Enter references (one per line)") |
| if st.button("Generate Paper") and outline: |
| with st.spinner("Writing paper..."): |
| paper = assistant.ai_writer(outline, references.split("\n")) |
| if "error" in paper: |
| st.error(paper["error"]) |
| else: |
| st.write(paper["choices"][0]["message"]["content"]) |
|
|
| with tabs[3]: |
| st.header("Extract Data") |
|
|
| uploaded_files = st.file_uploader( |
| "Upload multiple PDF files", type="pdf", accept_multiple_files=True |
| ) |
| if 'categories' not in st.session_state: |
| st.session_state.categories = [ |
| "Journal", "Journal Quality", "No Of Citations", |
| "Date Of Publications", "Title", "Abstract", "Author Keywords", |
| "Theories Used in The Paper", "Context Used In the Paper", "Methods and Material Used in This Paper", |
| "Antecedents and Problems", "Decision and Frameworks To Solve the Problem", "Outcomes", |
| "Study Findings", "Conclusions", |
| "TSC ADO" |
| ] |
| |
| st.write("### Current Categories") |
| st.write(st.session_state.categories) |
|
|
| |
| new_category = st.text_input("Add a new category") |
|
|
| if st.button("Add Category"): |
| if new_category.strip(): |
| if new_category not in st.session_state.categories: |
| st.session_state.categories.append(new_category) |
| st.success(f"Category '{new_category}' added!") |
| else: |
| st.warning(f"Category '{new_category}' already exists!") |
| else: |
| st.error("Category cannot be empty!") |
| |
|
|
| |
| st.write("### Updated Categories") |
| st.write(st.session_state.categories) |
| |
|
|
| if uploaded_files: |
| if st.button("Process Papers"): |
| |
| progress_bar = st.progress(0) |
| status_text = st.empty() |
|
|
| |
| results = [] |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| for i, file in enumerate(uploaded_files): |
| status_text.text(f"Processing {file.name}...") |
|
|
| |
| text = extract_text_from_pdf(file) |
|
|
| |
| paper_results = {"Filename": file.name} |
|
|
| |
| for j, category in enumerate(st.session_state.categories): |
| status_text.text(f"Processing {file.name} - {category}") |
| paper_results[category] = analyze_paper(text, category) |
|
|
| |
| progress = (i * len(st.session_state.categories) + j + 1) / ( |
| len(uploaded_files) * len(st.session_state.categories) |
| ) |
| progress_bar.progress(progress) |
|
|
| |
| time.sleep(1) |
|
|
| results.append(paper_results) |
|
|
| |
| df = pd.DataFrame(results) |
|
|
| |
| csv = df.to_csv(index=False) |
|
|
| |
| st.download_button( |
| label="Download Results as CSV", |
| data=csv, |
| file_name="research_papers_analysis.csv", |
| mime="text/csv", |
| ) |
|
|
| |
| st.subheader("Analysis Results") |
| st.dataframe(df) |
|
|
| status_text.text("Processing complete!") |
| progress_bar.progress(1.0) |
| |
| with tabs[4]: |
| st.header("Paraphraser") |
| text = st.text_area("Enter text to paraphrase") |
| if st.button("Paraphrase") and text: |
| with st.spinner("Paraphrasing..."): |
| result = assistant.paraphrase(text) |
| if "error" in result: |
| st.error(result["error"]) |
| else: |
| st.write(result["choices"][0]["message"]["content"]) |
|
|
| with tabs[5]: |
| st.header("Citation Generator") |
| col1, col2 = st.columns(2) |
| with col1: |
| title = st.text_input("Paper Title") |
| authors = st.text_input("Authors (comma-separated)") |
| with col2: |
| year = st.text_input("Year") |
| style = st.selectbox("Citation Style", ["APA", "MLA", "Chicago"]) |
|
|
| if st.button("Generate Citation") and title: |
| with st.spinner("Generating citation..."): |
| citation = assistant.generate_citation( |
| { |
| "title": title, |
| "authors": [a.strip() for a in authors.split(",")], |
| "year": year, |
| }, |
| style, |
| ) |
| if "error" in citation: |
| st.error(citation["error"]) |
| else: |
| st.code(citation["citation"], language="text") |
|
|
| with tabs[6]: |
| st.header("AI Detector") |
| text = st.text_area("Enter text to analyze") |
| if st.button("Detect AI Content") and text: |
| with st.spinner("Analyzing..."): |
| result = assistant.detect_ai_content(text) |
| if "error" in result: |
| st.error(result["error"]) |
| else: |
| st.write(result["choices"][0]["message"]["content"]) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|