toolcalls with hermes-3

This commit is contained in:
2024-12-31 17:09:36 +01:00
parent 126f4a3fad
commit 823f13ab51

View File

@@ -14,7 +14,7 @@ model_name = "NousResearch/Hermes-3-Llama-3.2-3B" # will cache on C:\Users\ftob
# "meta-llama/Llama-2-7b-hf" # Replace with your chosen model
quantization_config_4bit = BitsAndBytesConfig(
quantization_config_4bit = BitsAndBytesConfig( # tool calls don't really work in 4 bit mode
load_in_4bit=True,
bnb_4bit_quant_type="nf4", # Recommended for better performance
bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
@@ -30,7 +30,7 @@ model = AutoModelForCausalLM.from_pretrained(
# device_map="cuda", # Automatically places parts of the model on GPU/CPU
device_map="cuda", # Automatically places parts of the model on GPU/CPU
# load_in_8bit=True, # Enables 8-bit quantization if bitsandbytes is installed
quantization_config=quantization_config_4bit
quantization_config=quantization_config_8bit
)
# Load tokenizer
@@ -52,23 +52,23 @@ print("max_context_length is %d tokens." % (max_context_length))
# Generate text
schema = """
{
"properties": {
"program": {
"description": "Python program to be executed. The Message response to the input query is the output of this program",
"title": "Program",
"type": "string"
},
# schema = """
# {
# "properties": {
# "program": {
# "description": "Python program to be executed. The Message response to the input query is the output of this program",
# "title": "Program",
# "type": "string"
# },
},
"required": [
"program"
],
"title": "Response",
"type": "object"
}
"""
# },
# "required": [
# "program"
# ],
# "title": "Response",
# "type": "object"
# }
# """
# """
@@ -79,10 +79,10 @@ schema = """
# }
# """
tool_assist = """
You are a python assisted AI model. You may call the interpreter one or more times to assist with the user query. You might tell the user that you optained ground truth with the help of python or a calculator if asked about. The user is not able to see if python has been used, therefore do not expose and share failed attempts or syntax errors.
To invoke a this function, the answer may start and end with <python_tool_call> and </python_tool_call> respectively. The rest must be a valid python script, additional text is not allowed before and after. Calling python is not needed when just providing example code.
"""
# tool_assist = """
# You are a python assisted AI model. You may call the interpreter one or more times to assist with the user query. You might tell the user that you optained ground truth with the help of python or a calculator if asked about. The user is not able to see if python has been used, therefore do not expose and share failed attempts or syntax errors.
# To invoke a this function, the answer may start and end with <python_tool_call> and </python_tool_call> respectively. The rest must be a valid python script, additional text is not allowed before and after. Calling python is not needed when just providing example code.
# """
messages = [
# {"role": "system", "content": "Hold a casual conversation with the user. Keep responses short at max 3 sentences."},
@@ -187,6 +187,7 @@ def append_generate_chat(input_text: str, role="user"):
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", tokenize=True, return_dict=True, add_generation_prompt=True, tools=tool_functions) #continue_final_message=True,
inputs = {key: value.to(model.device) for key, value in inputs.items()}
# inputs = {key: value.to("cpu") for key, value in inputs.items()}
# inputs["input_ids"] = inputs["input_ids"][:, 1:]
with torch.inference_mode():
outputs, out_text = generate_incremental(inputs)