philosophy_aristotle

Runtime error

App Files Files Community

philosophy_aristotle / philosophy.py

alx-d

Update philosophy.py

a5abbb0 verified over 1 year ago

raw

history blame contribute delete

3.46 kB

	import transformers # Added since we use transformers.pipeline below
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from llama_index.llm_predictor import LLMPredictor # Updated import path for LLMPredictor
	from llama_index import (
	PromptHelper,
	StorageContext,
	ServiceContext,
	load_index_from_storage,
	SimpleDirectoryReader,
	GPTVectorStoreIndex
	)
	from langchain.llms import HuggingFacePipeline
	import torch
	import gradio as gr
	from ratelimit import limits, sleep_and_retry
	import sys
	import os

	# Configure device
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	def create_llm_pipeline():
	# Load model and tokenizer
	model = AutoModelForCausalLM.from_pretrained(
	"deepseek-ai/DeepSeek-R1",
	trust_remote_code=True,
	torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
	device_map="auto"
	)
	tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1")

	# Create pipeline
	pipeline = transformers.pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	device=0 if DEVICE == "cuda" else -1, # Use device index: 0 for GPU, -1 for CPU
	max_length=2048,
	do_sample=True,
	temperature=0.7,
	top_p=0.95,
	)

	return HuggingFacePipeline(pipeline=pipeline)

	# Define the rate limit for processing
	RATE_LIMIT = 3

	@sleep_and_retry
	@limits(calls=RATE_LIMIT, period=1)
	def create_service_context():
	# Constraint parameters
	max_input_size = 4096
	num_outputs = 2048 # Adjusted for DeepSeek model
	max_chunk_overlap = 15
	chunk_size_limit = 600

	# Create prompt helper
	prompt_helper = PromptHelper(
	max_input_size,
	num_outputs,
	chunk_overlap_ratio=0.1,
	chunk_size_limit=chunk_size_limit
	)

	# Create LLM predictor with DeepSeek model
	llm = create_llm_pipeline()
	llm_predictor = LLMPredictor(llm=llm)

	# Create service context
	service_context = ServiceContext.from_defaults(
	llm_predictor=llm_predictor,
	prompt_helper=prompt_helper
	)
	return service_context

	@sleep_and_retry
	@limits(calls=RATE_LIMIT, period=1)
	def data_ingestion_indexing(directory_path):
	# Load documents
	documents = SimpleDirectoryReader(directory_path).load_data()

	# Create index
	index = GPTVectorStoreIndex.from_documents(
	documents,
	service_context=create_service_context()
	)

	# Persist index
	index.storage_context.persist()
	return index

	def data_querying(input_text):
	# Load stored index
	storage_context = StorageContext.from_defaults(persist_dir="./storage")
	index = load_index_from_storage(
	storage_context,
	service_context=create_service_context()
	)

	# Query the index
	response = index.as_query_engine().query(input_text)
	return response.response

	# Create Gradio interface
	iface = gr.Interface(
	fn=data_querying,
	inputs=gr.components.Textbox(
	lines=20,
	label="Enter your question"
	),
	outputs=gr.components.Textbox(
	lines=25,
	label="Response",
	style="height: 400px; overflow-y: scroll;"
	),
	title="Philosophy QA - Aristotle Complete Works (Using DeepSeek-R1)"
	)

	# Initialize the system
	if __name__ == "__main__":
	# Create initial index
	index = data_ingestion_indexing("books")
	# Launch the interface
	iface.launch()