MathResearchIA / wiki_scraper.py
AdelMessaoudi-13's picture
πŸš€ Math Research AI
1ba1ba2
import wikipediaapi
import os
import requests
API_KEY = os.getenv("API_KEY_GEMINI")
GEMINI_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={API_KEY}"
def generate_summary(text):
"""Generates a simplified summary of the text using the Gemini Flash API."""
if not API_KEY:
return "⚠️ Error: Missing API key. Please configure API_KEY_GEMINI."
headers = {"Content-Type": "application/json"}
text = text[:4000] # βœ… Send only the first 4000 characters
prompt = (
f"Here is an article extracted from Wikipedia:\n\n{text}\n\n"
"Generate a simplified summary that is easy to understand even for someone without an advanced background in mathematics. "
"Use an educational tone and rephrase complex concepts in simple terms. "
"The summary should be concise (5 to 7 sentences) and focus on the essential points."
)
data = {
"contents": [
{
"parts": [
{"text": prompt}
]
}
]
}
try:
response = requests.post(GEMINI_URL, headers=headers, json=data)
response_json = response.json()
if "candidates" not in response_json:
return "⚠️ Gemini API Error: Unexpected response. No text was returned."
result = response_json["candidates"][0]["content"]["parts"][0]["text"]
return result
except Exception as e:
return f"⚠️ Gemini API Error: {e}"
def get_wikipedia_article(topic):
"""Fetches the full Wikipedia article and reformulates it with Gemini 2.0 Flash."""
user_agent = "MathResearchAI/1.0 (https://huggingface.co/spaces/AdelMessaoudi-13/MathResearchIA)"
wiki = wikipediaapi.Wikipedia(language="en", user_agent=user_agent)
page = wiki.page(topic)
if not page.exists():
return f"⚠️ No article found for '{topic}'."
raw_text = page.text # πŸ” Retrieve the full article
summary = generate_summary(raw_text) # πŸ” Summarize with Gemini
wikipedia_url = page.fullurl # πŸ”— Include Wikipedia source
summary_with_source = f"{summary}\n\nπŸ”— **Wikipedia Source**: {wikipedia_url}"
return summary_with_source