Spaces:

kevansoon
/

backend

Sleeping

backend / tools /langextract_tool.py

KevanSoon

first project init

f147852 6 months ago

1.99 kB

	import os
	import langextract as lx
	import textwrap
	from dotenv import load_dotenv

	# Step 1: Load environment variables from a .env file
	load_dotenv()
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	os.environ["LANGEXTRACT_API_KEY"] = GEMINI_API_KEY

	# 1. Define the prompt and extraction rules
	prompt = textwrap.dedent(
	"""\
	Extract characters, emotions, and relationships in order of appearance.
	Use exact text for extractions. Do not paraphrase or overlap entities.
	Provide meaningful attributes for each entity to add context."""
	)

	# 2. Provide a high-quality example to guide the model
	examples = [
	lx.data.ExampleData(
	text="ROMEO. But soft! What light through yonder window breaks? It is the east, and Juliet is the sun.",
	extractions=[
	lx.data.Extraction(
	extraction_class="character",
	extraction_text="ROMEO",
	attributes={"emotional_state": "wonder"},
	),
	lx.data.Extraction(
	extraction_class="emotion",
	extraction_text="But soft!",
	attributes={"feeling": "gentle awe"},
	),
	lx.data.Extraction(
	extraction_class="relationship",
	extraction_text="Juliet is the sun",
	attributes={"type": "metaphor"},
	),
	],
	)
	]

	# The input text to be processed
	input_text = "Lady Juliet gazed longingly at the stars, her heart aching for Romeo"

	# Run the extraction
	result = lx.extract(
	text_or_documents=input_text,
	prompt_description=prompt,
	examples=examples,
	model_id="gemini-2.5-flash",
	)

	# Save the results to a JSONL file
	lx.io.save_annotated_documents(
	[result], output_name="extraction_results.jsonl", output_dir="."
	)

	# Generate the visualization from the file
	html_content = lx.visualize("extraction_results.jsonl")
	with open("visualization.html", "w", encoding="utf-8") as f:
	f.write(html_content)