Spaces:
Runtime error
Runtime error
| import transformers # Added since we use transformers.pipeline below | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from llama_index.llm_predictor import LLMPredictor # Updated import path for LLMPredictor | |
| from llama_index import ( | |
| PromptHelper, | |
| StorageContext, | |
| ServiceContext, | |
| load_index_from_storage, | |
| SimpleDirectoryReader, | |
| GPTVectorStoreIndex | |
| ) | |
| from langchain.llms import HuggingFacePipeline | |
| import torch | |
| import gradio as gr | |
| from ratelimit import limits, sleep_and_retry | |
| import sys | |
| import os | |
| # Configure device | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| def create_llm_pipeline(): | |
| # Load model and tokenizer | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "deepseek-ai/DeepSeek-R1", | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, | |
| device_map="auto" | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1") | |
| # Create pipeline | |
| pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device=0 if DEVICE == "cuda" else -1, # Use device index: 0 for GPU, -1 for CPU | |
| max_length=2048, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.95, | |
| ) | |
| return HuggingFacePipeline(pipeline=pipeline) | |
| # Define the rate limit for processing | |
| RATE_LIMIT = 3 | |
| def create_service_context(): | |
| # Constraint parameters | |
| max_input_size = 4096 | |
| num_outputs = 2048 # Adjusted for DeepSeek model | |
| max_chunk_overlap = 15 | |
| chunk_size_limit = 600 | |
| # Create prompt helper | |
| prompt_helper = PromptHelper( | |
| max_input_size, | |
| num_outputs, | |
| chunk_overlap_ratio=0.1, | |
| chunk_size_limit=chunk_size_limit | |
| ) | |
| # Create LLM predictor with DeepSeek model | |
| llm = create_llm_pipeline() | |
| llm_predictor = LLMPredictor(llm=llm) | |
| # Create service context | |
| service_context = ServiceContext.from_defaults( | |
| llm_predictor=llm_predictor, | |
| prompt_helper=prompt_helper | |
| ) | |
| return service_context | |
| def data_ingestion_indexing(directory_path): | |
| # Load documents | |
| documents = SimpleDirectoryReader(directory_path).load_data() | |
| # Create index | |
| index = GPTVectorStoreIndex.from_documents( | |
| documents, | |
| service_context=create_service_context() | |
| ) | |
| # Persist index | |
| index.storage_context.persist() | |
| return index | |
| def data_querying(input_text): | |
| # Load stored index | |
| storage_context = StorageContext.from_defaults(persist_dir="./storage") | |
| index = load_index_from_storage( | |
| storage_context, | |
| service_context=create_service_context() | |
| ) | |
| # Query the index | |
| response = index.as_query_engine().query(input_text) | |
| return response.response | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=data_querying, | |
| inputs=gr.components.Textbox( | |
| lines=20, | |
| label="Enter your question" | |
| ), | |
| outputs=gr.components.Textbox( | |
| lines=25, | |
| label="Response", | |
| style="height: 400px; overflow-y: scroll;" | |
| ), | |
| title="Philosophy QA - Aristotle Complete Works (Using DeepSeek-R1)" | |
| ) | |
| # Initialize the system | |
| if __name__ == "__main__": | |
| # Create initial index | |
| index = data_ingestion_indexing("books") | |
| # Launch the interface | |
| iface.launch() | |