first tests
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
/model/*
|
||||||
|
*.prof
|
38
llama.py
Normal file
38
llama.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
import time
|
||||||
|
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
# model_name = "NousResearch/Llama-2-7b-hf" # will cache on C:\Users\ftobler\.cache\huggingface\hub
|
||||||
|
model_name = "NousResearch/Hermes-3-Llama-3.2-3B" # will cache on C:\Users\ftobler\.cache\huggingface\hub
|
||||||
|
# "meta-llama/Llama-2-7b-hf" # Replace with your chosen model
|
||||||
|
|
||||||
|
# Load the model with quantization (optional)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_name,
|
||||||
|
# device_map="auto", # Automatically places parts of the model on GPU/CPU
|
||||||
|
device_map="cpu", # Automatically places parts of the model on GPU/CPU
|
||||||
|
load_in_8bit=False, # Enables 8-bit quantization if bitsandbytes is installed
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load tokenizer
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
|
||||||
|
print("load took %.3fs" % (time.time() - t_start))
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
# Generate text
|
||||||
|
input_text = "Hello, who are you?"
|
||||||
|
inputs = tokenizer(input_text, return_tensors="pt").to("cpu") # .to("cuda") .to("cpu")
|
||||||
|
outputs = model.generate(
|
||||||
|
inputs["input_ids"],
|
||||||
|
# max_length=200,
|
||||||
|
pad_token_id=tokenizer.pad_token_id,
|
||||||
|
eos_token_id=tokenizer.eos_token_id
|
||||||
|
)
|
||||||
|
|
||||||
|
# Decode and print result
|
||||||
|
print(tokenizer.decode(outputs[0], skip_special_tokens=False))
|
||||||
|
|
||||||
|
print("genaration took %.3fs" % (time.time() - t_start))
|
||||||
|
t_start = time.time()
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
bitsandbytes
|
Reference in New Issue
Block a user