You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
95 lines
2.6 KiB
95 lines
2.6 KiB
|
|
from modelconfig import Modelconfig
|
|
|
|
|
|
|
|
def get_model() -> Modelconfig:
|
|
|
|
# model: NousResearch/Hermes-3-Llama-3.2-3B
|
|
# tokens: 315 tk
|
|
# time: 94.360 s
|
|
# speed: 3.338 tk/s
|
|
# vram_bulk: 3622 MB
|
|
# vram_top: 80 MB
|
|
# context: 131072 tk
|
|
# model = Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True)
|
|
|
|
# model: unsloth/Llama-3.2-1B
|
|
# tokens: 589 tk
|
|
# time: 39.348 s
|
|
# speed: 14.969 tk/s
|
|
# vram_bulk: 4708 MB
|
|
# vram_top: 102 MB
|
|
# context: 131072 tk
|
|
# model = Modelconfig("unsloth/Llama-3.2-1B") # note, fast, but talks to itself. basically does not work.
|
|
|
|
# model: unsloth/Llama-3.2-3B-Instruct
|
|
# tokens: 285 tk
|
|
# time: 75.363 s
|
|
# speed: 3.782 tk/s
|
|
# vram_bulk: 3512 MB
|
|
# vram_top: 48 MB
|
|
# context: 131072 tk
|
|
# model = Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True)
|
|
|
|
# model: unsloth/llama-3-8b-bnb-4bit
|
|
# tokens: 435 tk
|
|
# time: 84.314 s
|
|
# speed: 5.159 tk/s
|
|
# vram_bulk: 5440 MB
|
|
# vram_top: 216 MB
|
|
# context: 8192 tk
|
|
# model = Modelconfig("unsloth/llama-3-8b-bnb-4bit")
|
|
|
|
# Model size: 3.21B params
|
|
# vram used: xxxxx MB
|
|
# speed xxxxx t/s
|
|
# working: DOES NOT LOAD
|
|
# model = Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_8bit=True)
|
|
|
|
# model: unsloth/gemma-2-9b-it-bnb-4bit
|
|
# tokens: 154 tk
|
|
# time: 32.727 s
|
|
# speed: 4.706 tk/s
|
|
# vram_bulk: 6156 MB
|
|
# vram_top: 232 MB
|
|
# context: 8192 tk
|
|
# model = Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit")
|
|
|
|
# model: unsloth/Qwen2.5-7B-Instruct-bnb-4bit
|
|
# tokens: 120 tk
|
|
# time: 12.248 s
|
|
# speed: 9.798 tk/s
|
|
# vram_bulk: 5382 MB
|
|
# vram_top: 170 MB
|
|
# context: 32768 tk
|
|
model = Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit") # note, this works really good
|
|
|
|
# model: unsloth/Qwen2.5-3B-Instruct
|
|
# tokens: 112 tk
|
|
# time: 12.703 s
|
|
# speed: 8.816 tk/s
|
|
# vram_bulk: 2108 MB
|
|
# vram_top: 98 MB
|
|
# context: 32768 tk
|
|
# model = Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True)
|
|
|
|
# model: unsloth/Qwen2.5-3B-Instruct
|
|
# tokens: 118 tk
|
|
# time: 33.748 s
|
|
# speed: 3.497 tk/s
|
|
# vram_bulk: 3310 MB
|
|
# vram_top: 60 MB
|
|
# context: 32768 tk
|
|
# model = Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_8bit=True)
|
|
|
|
# Model size: 3.87B params
|
|
# vram used: xxxxx MB
|
|
# speed xxxxx t/s
|
|
# error: requires the protobuf library but it was not found in your environment
|
|
# model = Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit")
|
|
|
|
return model
|
|
|
|
|
|
|
|
|