Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import asyncio | |
| import aiohttp | |
| from bs4 import BeautifulSoup | |
| from sqlalchemy import create_engine, Column, String, Integer, declarative_base | |
| from sqlalchemy.orm import sessionmaker | |
| # Define URLs for different sources | |
| sources = { | |
| "NSA": "https://www.nsa.gov/about/foia/", | |
| "NSO": "https://www.archives.gov", | |
| "AATIP": "https://www.defense.gov/Explore/Spotlight/a-t/", | |
| "NCI": "https://www.cancer.gov/research", | |
| "NIC": "https://www.dni.gov/index.php/nic-home", | |
| "NRO": "https://www.nro.gov/foia-home/", | |
| "FBI": "https://vault.fbi.gov/", | |
| "CIA Historical Collections": "https://www.cia.gov/readingroom/historical-collections", | |
| "AEC Records": "https://www.archives.gov/research/guide-fed-records/groups/326.html", | |
| "DOE Records": "https://www.archives.gov/research/guide-fed-records/groups/434.html", | |
| "Intelligence.gov": "https://www.intelligence.gov/", | |
| "DIA Archives": "https://www.dia.mil/FOIA/", | |
| "EPA FOIA": "https://www.epa.gov/foia", | |
| "NASA FOIA": "https://www.nasa.gov/foia", | |
| "NOAA FOIA": "https://www.noaa.gov/foia", | |
| "FCC FOIA": "https://www.fcc.gov/general/foia-request-guide", | |
| "Department of the Interior FOIA": "https://www.doi.gov/foia", | |
| "National Archives Electronic Reading Room": "https://www.archives.gov/foia/electronic-reading-room", | |
| "NGA FOIA": "https://www.nga.mil/resources/foia.html", | |
| "DARPA FOIA": "https://www.darpa.mil/about-us/foia", | |
| # Add more sources as needed | |
| } | |
| # Async function to fetch data | |
| async def fetch_data(url): | |
| async with aiohttp.ClientSession() as session: | |
| try: | |
| async with session.get(url, timeout=10) as response: | |
| response.raise_for_status() | |
| return await response.text() | |
| except aiohttp.ClientError as e: | |
| return f"Error fetching data: {str(e)}" | |
| # Async function to fetch all sources | |
| async def fetch_all_sources(sources): | |
| tasks = [fetch_data(url) for url in sources.values()] | |
| results = await asyncio.gather(*tasks) | |
| return dict(zip(sources.keys(), results)) | |
| # Function to display sources | |
| def display_sources(): | |
| loop = asyncio.get_event_loop() | |
| results = loop.run_until_complete(fetch_all_sources(sources)) | |
| store_data(results) | |
| return results | |
| # Database setup | |
| engine = create_engine('sqlite:///foia_archive.db') | |
| Base = declarative_base() | |
| class Document(Base): | |
| __tablename__ = 'documents' | |
| id = Column(Integer, primary_key=True) | |
| source = Column(String) | |
| content = Column(String) | |
| Base.metadata.create_all(engine) | |
| def store_data(data): | |
| Session = sessionmaker(bind=engine) | |
| session = Session() | |
| for source, content in data.items(): | |
| doc = Document(source=source, content=content) | |
| session.add(doc) | |
| session.commit() | |
| # Gradio interface | |
| app = gr.Interface(fn=display_sources, inputs=[], outputs="json") | |
| app.launch() | |