Spaces:

udituen
/

docsqa

Sleeping

docsqa / models /llm_loader.py

code refactor

362de84 2 months ago

1.35 kB

	"""LLM loading and initialization."""

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from langchain_community.llms import HuggingFacePipeline
	import streamlit as st


	@st.cache_resource
	def load_qwen_llm(model_name, max_new_tokens=256, temperature=0.7, top_p=0.95):
	"""
	Load Qwen LLM model.

	Args:
	model_name: HuggingFace model identifier
	max_new_tokens: Maximum tokens to generate
	temperature: Sampling temperature
	top_p: Nucleus sampling parameter

	Returns:
	HuggingFacePipeline: Wrapped LLM for LangChain
	"""

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	trust_remote_code=True
	)

	# Create pipeline
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	return_full_text=False
	)

	return HuggingFacePipeline(pipeline=pipe)