Spaces:
Runtime error
Runtime error
| import nest_asyncio | |
| import os | |
| from dotenv import load_dotenv | |
| from jinja2 import Template | |
| from pydantic import BaseModel, Field | |
| from pymongo.mongo_client import MongoClient | |
| from llama_index.program.openai import OpenAIPydanticProgram | |
| from llama_index.core.extractors import PydanticProgramExtractor | |
| from llama_index.llms.openai import OpenAI | |
| from core.prompt import ADD_METADATA_TEMPLATE | |
| from core.summarization.summarizer import SummarizeGenerator | |
| nest_asyncio.apply() | |
| load_dotenv() | |
| class NodeMetadata(BaseModel): | |
| """Metadata for nodes, capturing topic and subtopic from the book.""" | |
| topic: str = Field( | |
| ..., | |
| description="The main subject or category that the node is associated with, representing a broad theme within the book.", | |
| ) | |
| subtopic: str = Field( | |
| ..., | |
| description="A more specific aspect or section under the main topic, refining the context of the node within the book.", | |
| ) | |
| def extract_topic(references, content_table): | |
| uri = os.getenv("MONGO_URI") | |
| client = MongoClient(uri) | |
| try: | |
| client.admin.command('ping') | |
| print("Pinged your deployment. You successfully connected to MongoDB!") | |
| except Exception as e: | |
| print(e) | |
| # Access a specific database | |
| db = client["summarizer"] | |
| # Access a collection within the database | |
| collection = db["topic_collection"] | |
| generate_content_table = SummarizeGenerator(references) | |
| extractor_output, extractor_dics = generate_content_table.extract_content_table(content_table) | |
| print(extractor_output) | |
| data_to_insert = { | |
| "title": references["title"], | |
| **extractor_dics # Unpack the extractor_output dictionary | |
| } | |
| collection.insert_one(data_to_insert) | |
| add_metadata_template = str( | |
| Template(ADD_METADATA_TEMPLATE).render(extractor_output=extractor_output) | |
| ) | |
| print("add metadata template : ", add_metadata_template) | |
| llm = OpenAI(temperature=0.1, model="gpt-4o-mini") | |
| openai_program = OpenAIPydanticProgram.from_defaults( | |
| output_cls=NodeMetadata, | |
| prompt_template_str="{input}", | |
| extract_template_str=add_metadata_template, | |
| llm=llm, | |
| ) | |
| topic_extractor = PydanticProgramExtractor( | |
| program=openai_program, | |
| input_key="input", | |
| show_progress=True, | |
| extract_template_str=add_metadata_template, | |
| llm=llm, | |
| ) | |
| return topic_extractor | |