different caching strategy

This commit is contained in:
2025-01-04 15:47:43 +01:00
parent 18aec52501
commit 78b24d8f9f
2 changed files with 90 additions and 5 deletions

View File

@@ -39,7 +39,7 @@ def append_generate_chat(input_text: str, role="user"):
inputs = inference.tokenize(messages, tokenize=True)
outputs, out_text = inference.generate_incremental(inputs)
outputs, out_text = inference.generate(inputs)
# append result to message history
messages.append({"role": "assistant", "content": out_text})
@@ -141,14 +141,14 @@ def main():
messages_temp = [summarize] + messages_temp + [summarize_user] # copy dict in last instance
# messages_temp[-1]["role"] = "user"
input_ids = inference.tokenize(messages_temp, tokenize=True, assistant_prefix="The conversation was about ")
generated_tokens, full_output = inference.generate_incremental(input_ids)
generated_tokens, full_output = inference.generate(input_ids)
elif input_text.startswith("/title"):
messages_temp = list(filter(lambda x: x["role"] != "system", messages))
messages_temp = [title_prompt] + messages_temp #+ [dict(title)] # copy dict in last instance
messages_temp[-1]["role"] = "user"
input_ids = inference.tokenize(messages_temp, tokenize=True, assistant_prefix="Title: ")
generated_tokens, full_output = inference.generate_incremental(input_ids)
generated_tokens, full_output = inference.generate(input_ids)
elif input_text.startswith("/help"):
print("!<prompt> answer as 'tool' in <tool_response> tags")