| import re |
| import json |
| import requests |
| import html as html_lib |
| import time |
| from typing import Optional |
| from fastapi import FastAPI |
| from fastapi.responses import HTMLResponse, PlainTextResponse |
| import random |
|
|
| app = FastAPI() |
|
|
| POLLINATIONS_URL = "https://text.pollinations.ai/prompt/" |
|
|
| |
|
|
| |
| HEADLINES_PROMPT = """ |
| You are an AI that produces a table of contents, for a neutral, encyclopedic Wikipedia-style article. |
| Write about the topic: "{topic}". |
| Output ONLY valid JSON and NOTHING else. Do not add explanatory text, headers, markdown or code fences. |
| Format exactly: |
| {{ |
| "title": "string", |
| "lead": "string", |
| "sections": [ |
| {{ |
| "heading": "string", |
| "subsections": [ |
| {{ |
| "subheading": "string" |
| }} |
| ] |
| }} |
| ], |
| "last_edited": "string" /* optional */ |
| }} |
| """ |
|
|
| |
| ARTICLE_PROMPT = """ |
| You are an AI that writes a complete, neutral, and detailed encyclopedic Wikipedia-style article. |
| The topic is "{topic}". |
| You have been given a JSON structure containing headings and subheadings. Your task is to write the content for this structure. |
| |
| Instructions: |
| 1. **Content Depth:** Write a detailed paragraph for each heading and subheading. Paragraphs for the main headings should be especially comprehensive, consisting of several sentences to provide a thorough overview of the section's topic. |
| 2. **Structure:** Do not invent new sections. Stick strictly to the provided input structure. |
| 3. **Output Format:** Output ONLY a valid JSON object and NOTHING else. The output JSON must have the exact same structure as the input, but with a "content" field added to each section and subsection. |
| |
| Input Structure: |
| {structure_json} |
| |
| Output Format Example: |
| {{ |
| "sections": [ |
| {{ |
| "heading": "History", |
| "content": "The history of the topic is long and varied, with early concepts dating back to ancient philosophy. Key developments in the 20th century, particularly the work on [[Turing Machines]], laid the groundwork for the modern field.", |
| "subsections": [ |
| {{ |
| "subheading": "Early developments", |
| "content": "In the early days, developments were slow and often theoretical..." |
| }} |
| ] |
| }} |
| ] |
| }} |
| """ |
|
|
| |
| RAW_LOG = {} |
|
|
| |
| def call_pollinations(prompt: str) -> str: |
| """Call Pollinations and return the raw text response (no stripping).""" |
| uri = POLLINATIONS_URL + requests.utils.requote_uri(prompt) + "?token=ZJyDM8G0LiZnNxFf&model=gemini&json=true&seed="+str(random.randint(0,999999)) |
| r = requests.get(uri, timeout=60) |
| r.raise_for_status() |
| return r.text |
|
|
| def extract_json(text: str) -> dict: |
| """Extract and parse the first JSON object found between first '{' and last '}'.""" |
| start = text.find("{") |
| end = text.rfind("}") + 1 |
| if start == -1 or end == -1 or start >= end: |
| raise ValueError("No JSON object found in AI response.\n\nRaw (truncated):\n" + text[:2000]) |
| json_str = text[start:end] |
| try: |
| return json.loads(json_str) |
| except Exception as e: |
| raise ValueError(f"Failed to parse JSON: {e}\n\nExtracted (truncated):\n{json_str[:2000]}\n\nRaw (truncated):\n{text[:2000]}") |
|
|
| def log_raw(topic: str, prompt: str, response: str): |
| """Append a prompt/response pair to RAW_LOG for the topic.""" |
| RAW_LOG.setdefault(topic, []).append((prompt, response)) |
|
|
| |
| def generate_headlines(topic: str) -> dict: |
| """Step 1: Get the article structure (TOC).""" |
| prompt = HEADLINES_PROMPT.format(topic=topic) |
| resp = call_pollinations(prompt) |
| log_raw(topic, prompt, resp) |
| data = extract_json(resp) |
| |
| data.setdefault("title", topic.replace("_", " ")) |
| data.setdefault("lead", data.get("lead", "")) |
| data.setdefault("sections", data.get("sections", [])) |
| return data |
|
|
| def generate_article_content(topic: str, toc_structure: dict) -> dict: |
| """Step 2: Generate all content for the given structure in one call.""" |
| |
| structure_for_prompt = { |
| "sections": [ |
| { |
| "heading": s.get("heading"), |
| "subsections": s.get("subsections", []) |
| } for s in toc_structure.get("sections", []) |
| ] |
| } |
| structure_json = json.dumps(structure_for_prompt, indent=2) |
| |
| prompt = ARTICLE_PROMPT.format(topic=topic, structure_json=structure_json) |
| resp = call_pollinations(prompt) |
| log_raw(topic, prompt, resp) |
| data = extract_json(resp) |
| return data |
|
|
| |
| def esc(s): return html_lib.escape(s) if isinstance(s, str) else "" |
|
|
| def render_page(article: dict, execution_time: Optional[float] = None) -> str: |
| """Render final HTML page from the fully-populated article JSON.""" |
| title = esc(article.get("title", "Untitled")) |
| lead = esc(article.get("lead", "")) |
| |
| css = """body{font-family:sans-serif;margin:0;background:#f6f6f7;color:#202122}#container{display:flex;min-height:100vh}#left-sidebar{width:18%;padding:1.2em;background:#f6f6f7;border-right:1px solid #a7d7f9;box-sizing:border-box}#main-content{width:82%;padding:1.6em;background:#fff;box-sizing:border-box}header{display:flex;justify-content:space-between;align-items:center;border-bottom:1px solid #a7d7f9;padding-bottom:.6em;margin-bottom:1em}#main-title{font-family:Georgia,serif;font-size:2em;margin:0 0 .2em 0;font-weight:normal}.site-sub{color:#54595d;margin-top:0;font-size:.95em}h2{font-size:1.3em;margin-top:1.2em;border-bottom:1px solid #a2a9b1;padding-bottom:.2em;font-weight:normal}h3{font-size:1.05em;margin-top:.8em}p{line-height:1.6}#toc{background:#f8f9fa;border:1px solid #a2a9b1;padding:1em;margin-bottom:1em;display:inline-block}footer{margin-top:2em;border-top:1px solid #a2a9b1;padding-top:1em;color:#54595d;font-size:.85em}.references ol{padding-left:1.2em}""" |
|
|
| parts = [ |
| "<!doctype html><html lang='en'><head><meta charset='utf-8'>", |
| f"<title>{title} - Wikipedai</title>", |
| "<link rel='icon' href='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai.png'>", |
| f"<style>{css}</style></head><body><div id='container'><div id='left-sidebar'>", |
| "<div style='text-align:center;margin-bottom:1em;'><a href='/'><img src='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai_logo.png' alt='logo' style='width:90px'></a></div>", |
| "<div style='margin-bottom:1em;'><strong>Main menu</strong><ul style='padding-left:1em;'><li><a href='#'>Main page</a></li><li><a href='#'>Contents</a></li><li><a href='#'>Random article</a></li></ul></div></div>", |
| "<div id='main-content'><header><div><a href='#'>Article</a> • <a href='#'>Talk</a></div><div><input placeholder='Search' id='search_bar' style='padding:.4em;border:1px solid #a2a9b1'></div></header>", |
| f"<main><h1 id='main-title'>{title}</h1><p class='site-sub'>From Wikipedai, the free encyclopedai</p>", |
| ] |
|
|
| if lead: parts.append(f"<p><strong>{lead}</strong></p>") |
|
|
| if article.get("sections"): |
| parts.append("<div id='toc'><h2>Contents</h2><ul>") |
| for i, sec in enumerate(article.get("sections", []), 1): |
| parts.append(f"<li><a href='#sec{i}'>{i}. {esc(sec.get('heading',''))}</a></li>") |
| if sec.get("subsections"): |
| parts.append("<ul>") |
| for j, sub in enumerate(sec.get("subsections", []), 1): |
| parts.append(f"<li><a href='#sec{i}_sub{j}'>{i}.{j} {esc(sub.get('subheading',''))}</a></li>") |
| parts.append("</ul>") |
| parts.append("</ul></div>") |
|
|
|
|
|
|
| for i, sec in enumerate(article.get("sections", []), 1): |
| parts.append(f"<h2 id='sec{i}'><span class='mw-headline'>{esc(sec.get('heading',''))}</span></h2>") |
| if sec.get("content"): parts.append(f"<p>{esc(sec.get('content',''))}</p>") |
| for j, sub in enumerate(sec.get("subsections", []) or [], 1): |
| parts.append(f"<h3 id='sec{i}_sub{j}'><span class='mw-headline'>{esc(sub.get('subheading',''))}</span></h3>") |
| if sub.get("content"): parts.append(f"<p>{esc(sub.get('content',''))}</p>") |
|
|
| footer_parts = [] |
| if article.get("last_edited"): footer_parts.append(f"This page was last edited on {esc(article.get('last_edited', ''))}") |
| if execution_time is not None: footer_parts.append(f"Page generated in {execution_time:.2f} seconds") |
| footer_content = " • ".join(footer_parts) |
|
|
| parts.append(f"</main><footer>{footer_content}</footer></div></div></body></html>") |
|
|
| js = """ |
| <script> |
| document.getElementById('search_bar').addEventListener('keydown', function(event) { |
| // Check if the key pressed was 'Enter' |
| if (event.key === 'Enter') { |
| // Prevent any default action |
| event.preventDefault(); |
| |
| // Get the user's query from the input field |
| const query = document.getElementById('search_bar').value; |
| |
| // If the query is empty, do nothing |
| if (!query) { |
| return; |
| } |
| |
| // URI-encode the query to handle special characters safely |
| const encodedQuery = encodeURIComponent(query); |
| |
| // Construct the final URL for the API |
| const apiUrl = `https://nihalgazi-wikipedai.hf.space/wikipedai/${encodedQuery}`; |
| |
| // Redirect the browser to the API URL |
| window.location.href = apiUrl; |
| } |
| }); |
| </script> |
| """ |
|
|
| parts.append(js) |
| return "\n".join(parts) |
|
|
| |
|
|
| @app.get("/wikipedai/{topic}", response_class=HTMLResponse) |
| def wikipedai(topic: str): |
| start_time = time.time() |
| RAW_LOG[topic] = [] |
|
|
| try: |
| |
| article_structure = generate_headlines(topic) |
|
|
| |
| article_content = generate_article_content(topic, article_structure) |
| |
| |
| |
| content_sections = article_content.get("sections", []) |
| for i, section_structure in enumerate(article_structure.get("sections", [])): |
| if i < len(content_sections): |
| |
| section_structure["content"] = content_sections[i].get("content", "[Content not generated]") |
| |
| |
| content_subsections = content_sections[i].get("subsections", []) |
| for j, sub_structure in enumerate(section_structure.get("subsections", [])): |
| if j < len(content_subsections): |
| sub_structure["content"] = content_subsections[j].get("content", "[Content not generated]") |
|
|
| |
| elapsed_time = time.time() - start_time |
| html = render_page(article_structure, execution_time=elapsed_time) |
| return HTMLResponse(content=html, status_code=200) |
|
|
| except Exception as e: |
| |
| import traceback |
| error_details = f"Error: {e}\n\nTraceback:\n{traceback.format_exc()}" |
| return HTMLResponse(content=f"<h1>Error</h1><pre>{html_lib.escape(error_details)}</pre>", status_code=500) |
|
|
| @app.get("/raw/{topic}", response_class=PlainTextResponse) |
| def raw(topic: str): |
| entries = RAW_LOG.get(topic, []) |
| if not entries: |
| return PlainTextResponse(f"No raw log found for topic '{topic}'. Try calling /wikipedai/{topic} first.", status_code=404) |
|
|
| out_lines = [] |
| for idx, (prompt, resp) in enumerate(entries, start=1): |
| out_lines.append(f"--- Input [{idx}] ---\n{prompt}\n\n--- AI response [{idx}] ---\n{resp}\n") |
| return PlainTextResponse("\n".join(out_lines), status_code=200) |