| | |
| | |
| | from abc import ABC, abstractmethod |
| | from typing import List, Tuple, Dict, Any |
| |
|
| | from llama_cpp import Llama |
| |
|
| |
|
| | from langfuse.decorators import observe, langfuse_context |
| | import os |
| |
|
| | |
| | os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-04d2302a-aa5c-4870-9703-58ab64c3bcae" |
| | os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-d34ea200-feec-428e-a621-784fce93a5af" |
| | os.environ["LANGFUSE_HOST"] = "https://chris4k-langfuse-template-space.hf.space" |
| |
|
| | try: |
| | langfuse = Langfuse() |
| | except Exception as e: |
| | print("Langfuse Offline") |
| |
|
| |
|
| | class GenerationStrategy(ABC): |
| | """Base class for generation strategies.""" |
| | |
| | @abstractmethod |
| | def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], **kwargs) -> str: |
| | pass |
| |
|
| |
|
| | class DefaultStrategy(GenerationStrategy): |
| | @observe() |
| | def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], **kwargs) -> str: |
| | input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device) |
| | output = generator.model.generate(input_ids, **model_kwargs) |
| | return generator.tokenizer.decode(output[0], skip_special_tokens=True) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | class MajorityVotingStrategy(GenerationStrategy): |
| | def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str: |
| | outputs = [] |
| | for _ in range(num_samples): |
| | input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device) |
| | output = generator.model.generate(input_ids, **model_kwargs) |
| | outputs.append(generator.tokenizer.decode(output[0], skip_special_tokens=True)) |
| | return max(set(outputs), key=outputs.count) |
| |
|
| |
|
| | class BestOfN(GenerationStrategy): |
| | @observe() |
| | def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str: |
| | scored_outputs = [] |
| | for _ in range(num_samples): |
| | |
| | input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device) |
| |
|
| | |
| | output = generator.model.generate(input_ids, **model_kwargs) |
| | response = generator.tokenizer.decode(output[0], skip_special_tokens=True) |
| |
|
| |
|
| | |
| | prm_output = generator.prm_model( |
| | "<|system|>\n{system_message}</s>\n<|user|>\n{response}</s>\n<|assistant|>", |
| | max_tokens=512, |
| | stop=["</s>"], |
| | echo=True |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | score = prm_output.logits.mean().item() if hasattr(prm_output, 'logits') else 0.0 |
| |
|
| | |
| | scored_outputs.append((response, score)) |
| |
|
| | |
| | return max(scored_outputs, key=lambda x: x[1])[0] |
| |
|
| |
|
| |
|
| |
|
| | class BeamSearch(GenerationStrategy): |
| | def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str: |
| | input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device) |
| | outputs = generator.model.generate( |
| | input_ids, |
| | num_beams=num_samples, |
| | num_return_sequences=num_samples, |
| | **model_kwargs |
| | ) |
| | return [generator.tokenizer.decode(output, skip_special_tokens=True) for output in outputs] |
| |
|
| |
|
| | class DVT(GenerationStrategy): |
| | def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str: |
| | results = [] |
| | for _ in range(breadth): |
| | input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device) |
| | output = generator.model.generate(input_ids, **model_kwargs) |
| | response = generator.tokenizer.decode(output[0], skip_special_tokens=True) |
| | score = generator.prm_model(**generator.tokenizer(response, return_tensors="pt").to(generator.device)).logits.mean().item() |
| | results.append((response, score)) |
| | |
| | for _ in range(depth - 1): |
| | best_responses = sorted(results, key=lambda x: x[1], reverse=True)[:breadth] |
| | for response, _ in best_responses: |
| | input_ids = generator.tokenizer(response, return_tensors="pt").input_ids.to(generator.device) |
| | output = generator.model.generate(input_ids, **model_kwargs) |
| | extended_response = generator.tokenizer.decode(output[0], skip_special_tokens=True) |
| | score = generator.prm_model(**generator.tokenizer(extended_response, return_tensors="pt").to(generator.device)).logits.mean().item() |
| | results.append((extended_response, score)) |
| | return max(results, key=lambda x: x[1])[0] |
| | |
| |
|
| | class COT(GenerationStrategy): |
| | def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str: |
| | |
| | |
| | return "Not implemented yet" |
| |
|
| |
|
| | class ReAct(GenerationStrategy): |
| | def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str: |
| | |
| | return "Not implemented yet" |
| | |
| |
|