import gradio as gr import asyncio import aiohttp from bs4 import BeautifulSoup from sqlalchemy import create_engine, Column, String, Integer, declarative_base from sqlalchemy.orm import sessionmaker # Define URLs for different sources sources = { "NSA": "https://www.nsa.gov/about/foia/", "NSO": "https://www.archives.gov", "AATIP": "https://www.defense.gov/Explore/Spotlight/a-t/", "NCI": "https://www.cancer.gov/research", "NIC": "https://www.dni.gov/index.php/nic-home", "NRO": "https://www.nro.gov/foia-home/", "FBI": "https://vault.fbi.gov/", "CIA Historical Collections": "https://www.cia.gov/readingroom/historical-collections", "AEC Records": "https://www.archives.gov/research/guide-fed-records/groups/326.html", "DOE Records": "https://www.archives.gov/research/guide-fed-records/groups/434.html", "Intelligence.gov": "https://www.intelligence.gov/", "DIA Archives": "https://www.dia.mil/FOIA/", "EPA FOIA": "https://www.epa.gov/foia", "NASA FOIA": "https://www.nasa.gov/foia", "NOAA FOIA": "https://www.noaa.gov/foia", "FCC FOIA": "https://www.fcc.gov/general/foia-request-guide", "Department of the Interior FOIA": "https://www.doi.gov/foia", "National Archives Electronic Reading Room": "https://www.archives.gov/foia/electronic-reading-room", "NGA FOIA": "https://www.nga.mil/resources/foia.html", "DARPA FOIA": "https://www.darpa.mil/about-us/foia", # Add more sources as needed } # Async function to fetch data async def fetch_data(url): async with aiohttp.ClientSession() as session: try: async with session.get(url, timeout=10) as response: response.raise_for_status() return await response.text() except aiohttp.ClientError as e: return f"Error fetching data: {str(e)}" # Async function to fetch all sources async def fetch_all_sources(sources): tasks = [fetch_data(url) for url in sources.values()] results = await asyncio.gather(*tasks) return dict(zip(sources.keys(), results)) # Function to display sources def display_sources(): loop = asyncio.get_event_loop() results = loop.run_until_complete(fetch_all_sources(sources)) store_data(results) return results # Database setup engine = create_engine('sqlite:///foia_archive.db') Base = declarative_base() class Document(Base): __tablename__ = 'documents' id = Column(Integer, primary_key=True) source = Column(String) content = Column(String) Base.metadata.create_all(engine) def store_data(data): Session = sessionmaker(bind=engine) session = Session() for source, content in data.items(): doc = Document(source=source, content=content) session.add(doc) session.commit() # Gradio interface app = gr.Interface(fn=display_sources, inputs=[], outputs="json") app.launch()