Spaces:
Sleeping
Sleeping
| def extract_wiki_id(wiki_url): | |
| """ | |
| Extracts the Wikipedia ID from the given URL. | |
| """ | |
| import re | |
| match = re.search(r'wiki/([^#?]+)', wiki_url) | |
| return match.group(1) if match else None | |
| def get_wiki_details(wiki_id): | |
| """ | |
| Placeholder function to get Wikipedia details using the wiki ID. | |
| """ | |
| # This should interact with the Wikipedia API or your backend service | |
| # For now, returning dummy data | |
| return { | |
| "pageid": 123456, | |
| "title": "Artificial Intelligence", | |
| "summary": "AI is the simulation of human intelligence in machines.", | |
| "wiki_xml": "<xml>...</xml>", | |
| "sections": { | |
| "Introduction": "AI Introduction content...", | |
| "History": "AI History content...", | |
| "Applications": "AI Applications content...", | |
| } | |
| } | |
| def init_llm_client(api_key, base_url="https://api.openai.com/v1"): | |
| """ | |
| Initializes the LLM client with the given API key and base URL. | |
| """ | |
| import openai | |
| openai.api_key = api_key | |
| openai.api_base = base_url | |
| return openai | |
| def split_content_into_sections(wiki_xml, content_format="Plain Text"): | |
| """ | |
| Split the Wikipedia content into logical sections. | |
| Args: | |
| wiki_xml (str): The XML content of the Wikipedia article | |
| content_format (str): The format to return the content in ("Plain Text" or "XML") | |
| Returns: | |
| dict: A dictionary mapping section names to their content | |
| """ | |
| from xml.etree import ElementTree as ET | |
| # Parse the XML content | |
| root = ET.fromstring(wiki_xml) | |
| sections = {} | |
| for child in root: | |
| # Assuming each child of the root is a section | |
| section_name = child.tag | |
| section_content = ET.tostring(child, encoding='unicode') | |
| # Add to sections dictionary | |
| if content_format == "XML": | |
| sections[section_name] = section_content | |
| else: # Plain Text | |
| try: | |
| # Try to extract text content only | |
| text_content = child.text if child.text else "" | |
| for elem in child.iter(): | |
| if elem.text and elem != child: | |
| text_content += " " + elem.text | |
| if elem.tail: | |
| text_content += " " + elem.tail | |
| sections[section_name] = text_content.strip() | |
| except Exception as e: | |
| # Fallback in case of parsing issues | |
| sections[section_name] = f"Error extracting text: {str(e)}" | |
| return sections | |
| def get_translate_prompt(article_title, artice_summary, original_content, target_lang): | |
| """ | |
| Constructs the translation prompt for the LLM. | |
| """ | |
| return f""" | |
| You are a professional translator. Translate the following content to {target_lang}. | |
| Title: {article_title} | |
| Summary: {artice_summary} | |
| Content: {original_content} | |
| """ |