googlesprojectzero's picture
Update app.py
edb0e50 verified
import gradio as gr
import asyncio
import aiohttp
from bs4 import BeautifulSoup
from sqlalchemy import create_engine, Column, String, Integer, declarative_base
from sqlalchemy.orm import sessionmaker
# Define URLs for different sources
sources = {
"NSA": "https://www.nsa.gov/about/foia/",
"NSO": "https://www.archives.gov",
"AATIP": "https://www.defense.gov/Explore/Spotlight/a-t/",
"NCI": "https://www.cancer.gov/research",
"NIC": "https://www.dni.gov/index.php/nic-home",
"NRO": "https://www.nro.gov/foia-home/",
"FBI": "https://vault.fbi.gov/",
"CIA Historical Collections": "https://www.cia.gov/readingroom/historical-collections",
"AEC Records": "https://www.archives.gov/research/guide-fed-records/groups/326.html",
"DOE Records": "https://www.archives.gov/research/guide-fed-records/groups/434.html",
"Intelligence.gov": "https://www.intelligence.gov/",
"DIA Archives": "https://www.dia.mil/FOIA/",
"EPA FOIA": "https://www.epa.gov/foia",
"NASA FOIA": "https://www.nasa.gov/foia",
"NOAA FOIA": "https://www.noaa.gov/foia",
"FCC FOIA": "https://www.fcc.gov/general/foia-request-guide",
"Department of the Interior FOIA": "https://www.doi.gov/foia",
"National Archives Electronic Reading Room": "https://www.archives.gov/foia/electronic-reading-room",
"NGA FOIA": "https://www.nga.mil/resources/foia.html",
"DARPA FOIA": "https://www.darpa.mil/about-us/foia",
# Add more sources as needed
}
# Async function to fetch data
async def fetch_data(url):
async with aiohttp.ClientSession() as session:
try:
async with session.get(url, timeout=10) as response:
response.raise_for_status()
return await response.text()
except aiohttp.ClientError as e:
return f"Error fetching data: {str(e)}"
# Async function to fetch all sources
async def fetch_all_sources(sources):
tasks = [fetch_data(url) for url in sources.values()]
results = await asyncio.gather(*tasks)
return dict(zip(sources.keys(), results))
# Function to display sources
def display_sources():
loop = asyncio.get_event_loop()
results = loop.run_until_complete(fetch_all_sources(sources))
store_data(results)
return results
# Database setup
engine = create_engine('sqlite:///foia_archive.db')
Base = declarative_base()
class Document(Base):
__tablename__ = 'documents'
id = Column(Integer, primary_key=True)
source = Column(String)
content = Column(String)
Base.metadata.create_all(engine)
def store_data(data):
Session = sessionmaker(bind=engine)
session = Session()
for source, content in data.items():
doc = Document(source=source, content=content)
session.add(doc)
session.commit()
# Gradio interface
app = gr.Interface(fn=display_sources, inputs=[], outputs="json")
app.launch()