File size: 2,897 Bytes
8518177
00602f6
 
 
 
 
8518177
00602f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edb0e50
 
00602f6
 
 
 
 
edb0e50
ffda678
8518177
00602f6
 
 
 
 
 
 
 
 
8518177
00602f6
 
 
 
 
8518177
00602f6
 
 
 
 
edb0e50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
import asyncio
import aiohttp
from bs4 import BeautifulSoup
from sqlalchemy import create_engine, Column, String, Integer, declarative_base
from sqlalchemy.orm import sessionmaker

# Define URLs for different sources
sources = {
    "NSA": "https://www.nsa.gov/about/foia/",
    "NSO": "https://www.archives.gov",
    "AATIP": "https://www.defense.gov/Explore/Spotlight/a-t/",
    "NCI": "https://www.cancer.gov/research",
    "NIC": "https://www.dni.gov/index.php/nic-home",
    "NRO": "https://www.nro.gov/foia-home/",
    "FBI": "https://vault.fbi.gov/",
    "CIA Historical Collections": "https://www.cia.gov/readingroom/historical-collections",
    "AEC Records": "https://www.archives.gov/research/guide-fed-records/groups/326.html",
    "DOE Records": "https://www.archives.gov/research/guide-fed-records/groups/434.html",
    "Intelligence.gov": "https://www.intelligence.gov/",
    "DIA Archives": "https://www.dia.mil/FOIA/",
    "EPA FOIA": "https://www.epa.gov/foia",
    "NASA FOIA": "https://www.nasa.gov/foia",
    "NOAA FOIA": "https://www.noaa.gov/foia",
    "FCC FOIA": "https://www.fcc.gov/general/foia-request-guide",
    "Department of the Interior FOIA": "https://www.doi.gov/foia",
    "National Archives Electronic Reading Room": "https://www.archives.gov/foia/electronic-reading-room",
    "NGA FOIA": "https://www.nga.mil/resources/foia.html",
    "DARPA FOIA": "https://www.darpa.mil/about-us/foia",
    # Add more sources as needed
}

# Async function to fetch data
async def fetch_data(url):
    async with aiohttp.ClientSession() as session:
        try:
            async with session.get(url, timeout=10) as response:
                response.raise_for_status()
                return await response.text()
        except aiohttp.ClientError as e:
            return f"Error fetching data: {str(e)}"

# Async function to fetch all sources
async def fetch_all_sources(sources):
    tasks = [fetch_data(url) for url in sources.values()]
    results = await asyncio.gather(*tasks)
    return dict(zip(sources.keys(), results))

# Function to display sources
def display_sources():
    loop = asyncio.get_event_loop()
    results = loop.run_until_complete(fetch_all_sources(sources))
    
    store_data(results)
    
    return results

# Database setup
engine = create_engine('sqlite:///foia_archive.db')
Base = declarative_base()

class Document(Base):
    __tablename__ = 'documents'
    id = Column(Integer, primary_key=True)
    source = Column(String)
    content = Column(String)

Base.metadata.create_all(engine)

def store_data(data):
   Session = sessionmaker(bind=engine)
   session = Session()
   
   for source, content in data.items():
       doc = Document(source=source, content=content)
       session.add(doc)

   session.commit()

# Gradio interface
app = gr.Interface(fn=display_sources, inputs=[], outputs="json")
app.launch()