Spaces:

smokxy
/

PaperFlux

Running

App Files Files Community

smokxy commited on Feb 13, 2025

Commit

1d07116

1 Parent(s): 18b9802

Intial design and tests

Browse files

Files changed (9) hide show

.env +14 -0
InitialDesign.png +0 -0
README.md +31 -0
paperflux/__init__.py +0 -0
poetry.lock +0 -0
pyproject.toml +27 -0
tests/README.md +3 -0
tests/api_test.py +60 -0
tests/pdf_workflow_test.py +92 -0

.env ADDED Viewed

	@@ -0,0 +1,14 @@

+# Hugging Face
+HF_API_KEY=your_huggingface_key
+# Gemini
+GEMINI_API_KEY=your_gemini_key
+# Redis
+REDIS_HOST=localhost
+REDIS_PORT=6379
+REDIS_DB=0
+# App Config
+DAILY_PAPER_LIMIT=
+CACHE_TTL=86400

InitialDesign.png ADDED Viewed

README.md ADDED Viewed

	@@ -0,0 +1,31 @@

+```
+paperflux/
+├── .env.example
+├── pyproject.toml
+├── poetry.lock
+├── README.md
+├── .gitignore
+├── src/
+│   ├── __init__.py
+│   ├── tools/
+│   │   ├── __init__.py
+│   │   ├── hf_tools/
+│   │   │   ├── __init__.py
+│   │   │   ├── paper_pdf_tool.py
+│   │   │   └── summarization_tool.py
+│   │   ├── cache/
+│   │   │   ├── __init__.py
+│   │   │   ├── redis_client.py       # Core Redis operations
+│   │   │   └── cache_interface.py    # Abstract base class
+│   │   └── cache_manager.py          # High-level cache operations
+│   ├── agents/
+│   │   ├── __init__.py
+│   │   └── agent.py
+│   ├── models/
+│   │   ├── __init__.py
+│   │   └── model.py           # Pydantic models for data validation
+│   │── scheduler.py                 # Scheduled cache updates
+|   └── app.py                       # gradio web app
+```
+``` Above is agentic workflow design, initial workflow will be using gemini api key and will be extended to agentic system ```

paperflux/__init__.py ADDED Viewed

File without changes

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,27 @@

+[project]
+name = "paperflux"
+version = "0.1.0"
+description = ""
+authors = [
+    {name = "kartikbhtt7",email = "kartikbhtt7@gmail.com"}
+]
+license = {text = "MIT"}
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "gradio (>=5.16.0,<6.0.0)",
+    "google-generativeai (>=0.8.4,<0.9.0)",
+    "redis (>=5.2.1,<6.0.0)",
+    "python-dotenv (>=1.0.1,<2.0.0)",
+    "requests (>=2.32.3,<3.0.0)",
+    "pypdf2 (>=3.0.1,<4.0.0)",
+    "apscheduler (>=3.11.0,<4.0.0)",
+    "python-multipart (>=0.0.20,<0.0.21)",
+    "httpx (>=0.28.1,<0.29.0)",
+    "markdown (>=3.7,<4.0)"
+]
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"

tests/README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+```
+These are simple tests for individual components
+```

tests/api_test.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import aiohttp
+import asyncio
+import os
+from datetime import datetime
+API_URL = "https://huggingface.co/api/daily_papers"
+PDF_BASE_URL = "https://arxiv.org/pdf/{id}.pdf"
+DOWNLOAD_DIR = "papers"
+async def fetch_papers(session):
+  async with session.get(API_URL) as response:
+    if response.status == 200:
+        return await response.json()
+    raise Exception(f"API request failed: {response.status}")
+async def download_pdf(session, paper_entry):
+  try:
+    paper_id = paper_entry["paper"]["id"]
+    pdf_url = PDF_BASE_URL.format(id=paper_id)
+    clean_id = paper_id.replace("/", "_")
+    filename = f"{datetime.now().date()}_{clean_id}.pdf"
+    filepath = os.path.join(DOWNLOAD_DIR, filename)
+    async with session.get(pdf_url) as response:
+        if response.status == 200:
+            content = await response.read()
+            with open(filepath, "wb") as f:
+                f.write(content)
+            return (paper_id, True)
+        return (paper_id, False)
+  except Exception as e:
+    print(f"Error downloading {paper_id}: {str(e)}")
+    return (paper_id, False)
+os.makedirs(DOWNLOAD_DIR, exist_ok=True)
+async def main():
+    async with aiohttp.ClientSession() as session:
+        papers = await fetch_papers(session)
+        print(f"Found {len(papers)} papers")
+        print(f"\nFound {len(papers)} papers:")
+        for i, paper_entry in enumerate(papers, 1):
+            paper = paper_entry.get("paper", {})
+            print(f"\nPaper {i}:")
+            print(f"ID: {paper.get('id')}")
+            print(f"Title: {paper.get('title')}")
+            print(f"Authors: {', '.join([author.get('name') for author in paper.get('authors', [])])}")
+            print(f"Published: {paper.get('publishedAt')}")
+            print(f"Summary: {paper.get('summary')[:200]}...")
+            print(f"PDF URL: {PDF_BASE_URL.format(id=paper.get('id'))}")
+        tasks = [download_pdf(session, paper) for paper in papers]
+        results = await asyncio.gather(*tasks)
+        successful = sum(1 for _, status in results if status)
+        print(f"Downloaded {successful}/{len(papers)} papers successfully")
+if __name__ == "__main__":
+    asyncio.run(main())

tests/pdf_workflow_test.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+import google.generativeai as genai
+from google.generativeai.types import HarmCategory, HarmBlockThreshold
+from pathlib import Path
+GEMINI_API_KEY = ""
+genai.configure(api_key=GEMINI_API_KEY)
+class PaperAnalyzer:
+    def __init__(self):
+        self.model = genai.GenerativeModel('gemini-1.5-pro-latest')
+        self.safety_settings = {
+            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
+            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
+            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
+        }
+    def analyze_paper(self, pdf_path: str) -> str:
+        """
+        Process entire PDF with images using Gemini's native PDF handling
+        Returns detailed technical analysis including visual elements
+        """
+        try:
+            abs_path = Path(pdf_path).absolute()
+            print(f"Looking for PDF at: {abs_path}")
+            if not abs_path.exists():
+                available_files = list(abs_path.parent.glob('*'))
+                print(f"Available files: {available_files}")
+                return f"File not found: {abs_path}"
+            uploaded_file = genai.upload_file(str(abs_path))
+            uploaded_file = genai.upload_file(pdf_path)
+            prompt = f"""Analyze this research paper thoroughly, considering both text and visual elements:
+            Provide in depth explanation with all core mathematical concepts and intuition behind them.
+            1. Paper Structure Analysis:
+               - Identify key sections (Abstract, Methodology, Results, etc.)
+               - Map the paper's argument flow
+            2. Technical Content:
+               - Explain core innovations with equations/examples
+               - Analyze diagrams/figures and their significance
+               - Extract key algorithms/pseudocode
+            3. Critical Evaluation:
+               - Strengths/weaknesses of methodology
+               - Compare with cited works
+               - Suggest improvements
+            4. Visual Element Analysis:
+               - Describe important figures/diagrams
+               - Explain visual data representations
+               - Connect images to textual content
+            Format output in Markdown with these sections:
+            # Paper Title
+            ## Core Contribution
+            ## Technical Breakdown
+            ## Visual Analysis
+            ## Critical Assessment
+            ## Potential Applications
+            """
+            response = self.model.generate_content(
+                [prompt, uploaded_file],
+                safety_settings=self.safety_settings,
+                generation_config={"temperature": 0.2}
+            )
+            genai.delete_file(uploaded_file.name)
+            return response.text
+        except Exception as e:
+            return f"Analysis failed: {str(e)}"
+if __name__ == "__main__":
+    analyzer = PaperAnalyzer()
+    paper_path = r"papers/test_pdf.pdf"
+    print(f"Current working directory: {os.getcwd()}")
+    print(f"Path exists: {Path(paper_path).exists()}")
+    analysis = analyzer.analyze_paper(paper_path)
+    print(analysis)
+    with open("full_analysis.md", "w") as f:
+        f.write(analysis)