You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
20 lines
950 B
20 lines
950 B
|
|
from transformers import BitsAndBytesConfig
|
|
import torch
|
|
|
|
class Modelconfig:
|
|
def __init__(self, model_name, bits_and_bytes_config=None, load_in_8bit=False, load_in_4bit=False):
|
|
self.model_name = model_name
|
|
if load_in_4bit:
|
|
assert bits_and_bytes_config == None
|
|
self.bits_and_bytes_config = BitsAndBytesConfig( # tool calls don't really work in 4 bit mode
|
|
load_in_4bit=True,
|
|
bnb_4bit_quant_type="nf4", # Recommended for better performance
|
|
bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
|
|
bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation
|
|
)
|
|
elif load_in_8bit:
|
|
assert bits_and_bytes_config == None
|
|
self.bits_and_bytes_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
else:
|
|
self.bits_and_bytes_config = bits_and_bytes_config
|