Compare commits

...

12 Commits

Author SHA1 Message Date
5e3747179f UI prototype 2025-01-15 23:39:33 +01:00
44e5bd423e start cases 2025-01-15 23:39:09 +01:00
03c93f4d8b force encoding 2025-01-15 23:38:50 +01:00
f9c4d3e2db add webserver 2025-01-15 23:38:39 +01:00
7224111a0b python package restructuring 2025-01-14 20:29:29 +01:00
0c022d4731 tuned prompt 2025-01-13 23:33:51 +01:00
a697f49698 whitespace 2025-01-13 23:33:22 +01:00
3218e7eb63 whitespace 2025-01-13 23:32:54 +01:00
ef789375c8 improved append file 2025-01-13 23:32:42 +01:00
7f0cb49156 refactoring 2025-01-13 22:39:32 +01:00
19870cdea8 try out some more models 2025-01-13 20:47:48 +01:00
677eb6d0ea switch for toolcalls 2025-01-12 21:16:42 +01:00
41 changed files with 4847 additions and 267 deletions

3
.gitignore vendored
View File

@@ -1,4 +1,5 @@
/model/* /model/*
*.prof *.prof
__pycache__ __pycache__
*.venv *.venv
*.egg-info

2
.vscode/launch.json vendored
View File

@@ -15,7 +15,7 @@
"name": "PyDebug: __main__.py", "name": "PyDebug: __main__.py",
"type": "debugpy", "type": "debugpy",
"request": "launch", "request": "launch",
"program": "__main__.py", "program": "chatbug/__main__.py",
"console": "integratedTerminal" "console": "integratedTerminal"
} }
] ]

0
chatbug/__init__.py Normal file
View File

View File

@@ -1,6 +1,7 @@
print("running __main__.-py") print("running __main__.-py")
from llama import main from chatbug.llama import main_func
if __name__ == "__main__": if __name__ == "__main__":
main() main_func()

37
chatbug/download_model.py Normal file
View File

@@ -0,0 +1,37 @@
from chatbug.inference import Inference
from chatbug.modelconfig import Modelconfig
def main():
# Model size: 3.21B params
Inference(Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True))
# Model size: 1.24B params
Inference(Modelconfig("unsloth/Llama-3.2-1B", load_in_8bit=True))
# Model size: 3.21B params
Inference(Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True))
# Model size: 4.65B params
Inference(Modelconfig("unsloth/llama-3-8b-bnb-4bit", load_in_4bit=True))
# Model size: 3.21B params
Inference(Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_4bit=True))
# Model size: 5.21B params
Inference(Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit", load_in_4bit=True))
# Model size: 4.46B params
Inference(Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit", load_in_4bit=True))
# Model size: 3.09B params
Inference(Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True))
# Model size: 3.87B params
Inference(Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit", load_in_4bit=True))
if __name__ == "__main__":
main()

46
chatbug/file_append.py Normal file
View File

@@ -0,0 +1,46 @@
import os
def check_append_file(prompt: str) -> str:
if "@" in prompt:
parts = prompt.split(" ")
content = []
for part in parts:
if part.startswith("@"):
filename = part[1:]
try:
if os.path.exists(filename):
with open(filename, "r", encoding="utf-8") as f:
content.append("%s:'''\n%s'''" % (filename, f.read()))
except FileNotFoundError:
print(f"File '{filename}' not found.")
except Exception as e:
print("exception encountered %s", e)
content.append(prompt)
return "\n".join(content)
return prompt
if __name__ == "__main__":
exit() # not accidentally trigger it
# Create some sample files
with open("fmain.py", "w") as f:
f.write("# This is main.py\n")
with open("finference.py", "w") as f:
f.write("# This is inference.py\n")
# Test cases
test_prompts = [
"@fmain.py",
"@fmain.py @finference.py",
"@fnonexistent.py",
"@fmain.py @fnonexistent.py"
]
for prompt in test_prompts:
print(f"Testing prompt: {prompt}")
result = check_append_file(prompt)
print(f"Result: {result}")
print("-" * 20)

170
chatbug/generation_loop.py Normal file
View File

@@ -0,0 +1,170 @@
import time
import json
import random
from chatbug.tool_helper import tool_list, parse_and_execute_tool_call
from chatbug.inference import Inference, torch_reseed
from chatbug.file_append import check_append_file
def msg(role: str, content: str) -> dict:
return {"role": role, "content": content}
class Terminal:
def __init__(self, inference: Inference, systemmessage: dict):
self.inference = inference
self.messages:list[dict] = [systemmessage]
# these are meant to be overwritten by better ones
self.roleflip = msg("system", "keep going.")
self.summarize = msg("system", "summarize conversation")
self.summarize_user = msg("system", "please summarize conversation")
self.title_prompt = msg("system", "create a title for this conversation")
def append_generate_chat(self, input_text: str, role="user"):
t_start = time.time()
# generate AI response
if input_text != None:
self.messages.append({"role": role, "content": input_text})
inputs = self.inference.tokenize(self.messages, tokenize=True)
number_of_input_tokens = inputs.shape[1]
outputs, out_text = self.inference.generate(inputs)
# append result to message history
self.messages.append({"role": "assistant", "content": out_text})
print("")
time_taken = time.time() - t_start
number_of_tokens = len(outputs[0])
tokens_per_second = (number_of_tokens - number_of_input_tokens) / time_taken
print("generation took %.3fs (%d tokens, %.3f t/s)" % (time_taken, number_of_tokens, tokens_per_second))
# handle tool call and check if a tool call has happened.
tool_result = parse_and_execute_tool_call(out_text, tool_list)
if tool_result != None:
# tool call happened
tool_result = "<tool_response>%s</tool_response>" % tool_result
# depending on the chat template the tool response tags must or must not be passed. :(
self.append_generate_chat(tool_result, role="tool")
def join(self):
while True:
# print an input prompt to receive text or commands
input_text = input(">>> ")
print("")
input_text = check_append_file(input_text)
if input_text.startswith("!"):
self.append_generate_chat("<tool_response>%s</tool_response>" % input_text[1:], role="tool")
# append_generate_chat("%s" % input_text[1:], role="tool") # depending on the chat template the tool response tags must or must not be passed. :(
elif input_text.startswith("/clear"):
print("clearing chat history")
start_msg = self.messages[0]
self.message = [start_msg]
print("")
elif input_text.startswith("/history"):
history = self.inference.tokenize(self.messages, tokenize=False)
# history = tokenizer.apply_chat_template(self.message, return_tensors="pt", tokenize=False, add_generation_prompt=False)
print(history)
elif input_text.startswith("/undo"):
if len(self.messages) > 2:
print("undo latest prompt")
self.message = self.messages[:-2]
else:
print("cannot undo because there are not enough self.message on history.")
print("")
elif input_text.startswith("/regen"):
if len(self.messages) >= 2:
print("regenerating message (not working)")
self.messages = self.messages[:-1]
seed = random.randint(0, 2**32 - 1) # Generate a random seed
torch_reseed(seed)
self.append_generate_chat(None)
else:
print("cannot regenerate because there are not enough self.message on history.")
print("")
elif input_text.startswith("/more"):
self.append_generate_chat(None)
elif input_text.startswith("/file"):
filename = input_text[len("/file "):]
print("read '%s' for prompt:" % filename)
with open(filename, "r") as f:
content = f.read()
print(content)
self.append_generate_chat(content)
elif input_text.startswith("/auto"):
message_backup = self.messages
self.messages = [self.roleflip]
for m in self.message_backup:
role = m["role"]
content = m["content"]
if role == "user":
role = "assistant"
elif role == "assistant":
role = "user"
if role != "system":
self.message.append({"role": role, "content": content})
self.append_generate_chat(None) # will automatically advance the conversation as 'user'
last_message = self.messages[-1]
last_message["role"] = "user"
self.messages = message_backup + [last_message]
self.append_generate_chat(None) # 'regular' chatbot answer
elif input_text.startswith("/summarize"):
messages_temp = list(filter(lambda x: x["role"] != "system", self.messages))
messages_temp = [self.summarize] + messages_temp + [self.summarize_user] # copy dict in last instance
# messages_temp[-1]["role"] = "user"
input_ids = self.inference.tokenize(messages_temp, tokenize=True, assistant_prefix="The conversation was about ")
generated_tokens, full_output = self.inference.generate(input_ids)
elif input_text.startswith("/title"):
messages_temp = list(filter(lambda x: x["role"] != "system", self.messages))
messages_temp = [self.title_prompt] + messages_temp #+ [dict(title)] # copy dict in last instance
messages_temp[-1]["role"] = "user"
input_ids = self.inference.tokenize(messages_temp, tokenize=True, assistant_prefix="Title: ")
generated_tokens, full_output = self.inference.generate(input_ids)
elif input_text.startswith("/save"):
with open("messages.json", "w") as f:
json.dump(self.messages, f, indent=4)
elif input_text.startswith("/load"):
with open("messages.json", "r") as f:
new_messages = json.load(f)
self.messages = [self.messages[0]] + new_messages[1:]
elif input_text.startswith("/help"):
print("!<prompt> answer as 'tool' in <tool_response> tags")
print("/clear clear chat history")
print("/undo undo latest prompt")
print("/regen regenerate the last message")
print("/more generate more additional information")
print("/file read prompt input from file")
print("/auto automatically advance conversation")
print("/summarize generate a summary of the chat")
print("/title generate a title of the chat")
print("/save write chat history to file")
print("/load load previously saved history")
print("/help print this message")
print("")
elif input_text.startswith("/"):
print("unknown command.")
else:
self.append_generate_chat(input_text)

View File

@@ -14,44 +14,52 @@ from transformers.cache_utils import (
) )
import torch import torch
import time import time
import utils
import re import re
import os import os
import chatbug.utils as utils
from chatbug.modelconfig import Modelconfig
torch.set_num_threads(os.cpu_count()) # Adjust this to the number of threads/cores you have torch.set_num_threads(os.cpu_count()) # Adjust this to the number of threads/cores you have
class Inference: class Inference:
def __init__(self): def __init__(self, modelconfig: Modelconfig):
print("loading LLM...") print("loading LLM '%s'..." % modelconfig.model_name)
t_start = time.time() t_start = time.time()
# model_name = "NousResearch/Llama-2-7b-hf" # will cache on C:\Users\ftobler\.cache\huggingface\hub # model_name = "NousResearch/Llama-2-7b-hf" # will cache on C:\Users\ftobler\.cache\huggingface\hub
model_name = "NousResearch/Hermes-3-Llama-3.2-3B" # will cache on C:\Users\ftobler\.cache\huggingface\hub # model_name = "NousResearch/Hermes-3-Llama-3.2-3B" # will cache on C:\Users\ftobler\.cache\huggingface\hub
# model_name = "unsloth/phi-4-unsloth-bnb-4bit" #too big
# model_name = "gpt2" # model_name = "gpt2"
# model_name = "NousResearch/Hermes-2-Pro-Llama-3-8B" # model_name = "NousResearch/Hermes-2-Pro-Llama-3-8B"
# model_name = "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2" # model_name = "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2"
# "meta-llama/Llama-2-7b-hf" # Replace with your chosen model # "meta-llama/Llama-2-7b-hf" # Replace with your chosen model
quantization_config_4bit = BitsAndBytesConfig( # tool calls don't really work in 4 bit mode # quantization_config_4bit = BitsAndBytesConfig( # tool calls don't really work in 4 bit mode
load_in_4bit=True, # load_in_4bit=True,
bnb_4bit_quant_type="nf4", # Recommended for better performance # bnb_4bit_quant_type="nf4", # Recommended for better performance
bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving # bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation # bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation
) # )
quantization_config_8bit = BitsAndBytesConfig(load_in_8bit=True) # quantization_config_8bit = BitsAndBytesConfig(load_in_8bit=True)
# Load the model with quantization (optional) # Load the model with quantization (optional)
self.model = AutoModelForCausalLM.from_pretrained( if modelconfig.bits_and_bytes_config != None:
model_name, self.model = AutoModelForCausalLM.from_pretrained(
# device_map="auto", # Automatically places parts of the model on GPU/CPU modelconfig.model_name,
# device_map="cuda", # Automatically places parts of the model on GPU/CPU # device_map="auto", # Automatically places parts of the model on GPU/CPU
device_map="cuda", # Automatically places parts of the model on GPU/CPU # device_map="cuda", # Automatically places parts of the model on GPU/CPU
# load_in_8bit=True, # Enables 8-bit quantization if bitsandbytes is installed device_map="cuda", # Automatically places parts of the model on GPU/CPU
quantization_config=quantization_config_8bit # load_in_8bit=True, # Enables 8-bit quantization if bitsandbytes is installed
) quantization_config=modelconfig.bits_and_bytes_config
)
else:
self.model = AutoModelForCausalLM.from_pretrained(
modelconfig.model_name,
device_map="cuda",
)
# print("apply optimization") # print("apply optimization")
# self.model.generation_config.cache_implementation = "static" # self.model.generation_config.cache_implementation = "static"
@@ -59,25 +67,25 @@ class Inference:
# Load tokenizer # Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.tokenizer = AutoTokenizer.from_pretrained(modelconfig.model_name)
print("load took %.3fs" % (time.time() - t_start)) print("load took %.3fs" % (time.time() - t_start))
max_context_length = self.model.config.max_position_embeddings self.max_context_length = self.model.config.max_position_embeddings
self.tokenizer.chat_template = utils.load_json_file("chat_template.json") self.tokenizer.chat_template = utils.load_json_file("chat_template.json")
print("max_context_length is %d tokens." % (max_context_length)) print("max_context_length is %d tokens." % (self.max_context_length))
def generate(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]: def generate(self, input_ids: torch.Tensor, print_stdout=True) -> tuple[torch.Tensor, str]:
with torch.inference_mode(): with torch.inference_mode():
with torch.no_grad(): with torch.no_grad():
return self.generate_incremental_2(input_ids) return self.generate_incremental_2(input_ids, print_stdout)
def generate_batch(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]: def generate_batch(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
outputs = self.model.generate( outputs = self.model.generate(
input_ids, # **inputs, inputs["input_ids"] input_ids, # **inputs, inputs["input_ids"]
max_new_tokens=500, # max_length=max_context_length, max_new_tokens=500, # max_length=max_context_length,
@@ -90,11 +98,12 @@ class Inference:
# skip all input tokens and only output the additional generated part of the conversation # skip all input tokens and only output the additional generated part of the conversation
input_token_count = len(input_ids[0]) input_token_count = len(input_ids[0])
out_text = self.tokenizer.decode(outputs[0][input_token_count:], skip_special_tokens=True) out_text = self.tokenizer.decode(outputs[0][input_token_count:], skip_special_tokens=True)
print(out_text) if print_stdout:
print(out_text)
return outputs, out_text return outputs, out_text
def generate_incremental_2(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
def generate_incremental_2(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
generated_tokens = input_ids generated_tokens = input_ids
past_key_values = DynamicCache() past_key_values = DynamicCache()
@@ -126,12 +135,14 @@ class Inference:
# Decode and print the newly generated token (skip special tokens) # Decode and print the newly generated token (skip special tokens)
# out_text = self.tokenizer.decode(next_token, skip_special_tokens=True) # out_text = self.tokenizer.decode(next_token, skip_special_tokens=True)
out_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True) out_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
print(out_text, end="", flush=True) # Print without newline if print_stdout:
print(out_text, end="", flush=True) # Print without newline
# Check if the generated token is the end-of-sequence token # Check if the generated token is the end-of-sequence token
# if next_token.item() == self.tokenizer.eos_token_id: # if next_token.item() == self.tokenizer.eos_token_id:
if new_tokens[-1].item() == self.tokenizer.eos_token_id: if new_tokens[-1].item() == self.tokenizer.eos_token_id:
print("") if print_stdout:
print("")
break break
# n += 1 # n += 1
@@ -150,12 +161,12 @@ class Inference:
return generated_tokens, full_output return generated_tokens, full_output
def generate_incremental(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]: def generate_incremental(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
with torch.inference_mode(): with torch.inference_mode():
return self._generate_incremental(input_ids) return self._generate_incremental(input_ids, print_stdout)
def _generate_incremental(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]: def _generate_incremental(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
# Start with the initial input tokens # Start with the initial input tokens
generated_tokens = input_ids # Initially, this is just the input tokens generated_tokens = input_ids # Initially, this is just the input tokens
@@ -169,7 +180,7 @@ class Inference:
while True: while True:
# Call the model with the current tokens # Call the model with the current tokens
outputs = self.model( outputs = self.model(
input_ids=generated_tokens, input_ids=generated_tokens,
use_cache=True, use_cache=True,
num_beams = 1 num_beams = 1
# past_key_values=past_key_values # past_key_values=past_key_values
@@ -183,11 +194,13 @@ class Inference:
# Decode and print the newly generated token (skip special tokens) # Decode and print the newly generated token (skip special tokens)
out_text = self.tokenizer.decode(next_token, skip_special_tokens=True) out_text = self.tokenizer.decode(next_token, skip_special_tokens=True)
print(out_text, end="", flush=True) # Print without newline if print_stdout:
print(out_text, end="", flush=True) # Print without newline
# Check if the generated token is the end-of-sequence token # Check if the generated token is the end-of-sequence token
if next_token.item() == self.tokenizer.eos_token_id: if next_token.item() == self.tokenizer.eos_token_id:
print("") if print_stdout:
print("")
break break
n += 1 n += 1

View File

@@ -0,0 +1,76 @@
import time
import nvidia_smi
import torch
import gc
from chatbug.inference import Inference
from chatbug.modelconfig import Modelconfig
def empty_cuda():
while True:
gc.collect()
torch.cuda.empty_cache()
time.sleep(0.5)
vram = nvidia_smi.get_gpu_stats()["memory_used"]
print("vram: %d MB" % vram)
if vram < 200:
return
def profile_ex(model_conf: Modelconfig):
print("")
empty_cuda()
messages = [
{"role": "system", "content": "Hold a casual conversation with the user. Keep responses short at max 3 sentences. Answer using markdown to the user."},
{"role": "user", "content": "How do astronomers determine the original wavelength of light emitted by a celestial body at rest, which is necessary for measuring its speed using the Doppler effect?"},
]
gpu_stats_before = nvidia_smi.get_gpu_stats()
inference = Inference(model_conf)
gpu_stats_loaded = nvidia_smi.get_gpu_stats()
t_start = time.time()
input_ids = inference.tokenize(messages, tokenize=True)
generated_tokens, full_output = inference.generate_batch(input_ids, print_stdout=False)
t_end = time.time()
gpu_stats_after = nvidia_smi.get_gpu_stats()
took = t_end - t_start
tokens = len(generated_tokens[0])
tokens_per = tokens / took
vram_bulk = gpu_stats_loaded["memory_used"] - gpu_stats_before["memory_used"]
vram_top = gpu_stats_after["memory_used"] - gpu_stats_loaded["memory_used"]
print("model: %s" % model_conf.model_name)
print("tokens: %d tk" % tokens)
print("time: %.3f s" % took)
print("speed: %.3f tk/s" % tokens_per)
print("vram_bulk: %d MB" % vram_bulk)
print("vram_top: %d MB" % vram_top)
print("context: %d tk" % inference.max_context_length)
print("")
def profile(model_conf):
try:
profile_ex(model_conf)
except Exception as e:
print("exception: " + str(e))
pass
def main():
profile(Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True))
profile(Modelconfig("unsloth/Llama-3.2-1B"))
profile(Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True))
profile(Modelconfig("unsloth/llama-3-8b-bnb-4bit"))
# profile(Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_8bit=True))
profile(Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit"))
profile(Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit"))
profile(Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True))
profile(Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_8bit=True))
profile(Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit"))
if __name__ == "__main__":
main()

46
chatbug/llama.py Normal file
View File

@@ -0,0 +1,46 @@
import datetime
from chatbug.tool_helper import tool_list
from chatbug.tool_functions import register_dummy
from chatbug.inference import Inference
from chatbug.generation_loop import Terminal, msg
from chatbug import model_selection
register_dummy()
def initialize_config(inference: Inference) -> Terminal:
# systemmessage at the very begin of the chat. Will be concatenated with the automatic tool usage descriptions
system_prompt = "Hold a casual conversation with the user. Keep responses short at max 5 sentences and on point. Answer using markdown to the user. When providing code examples, avoid comments which provide no additional information. Do not summarize."
current_date_and_time = datetime.datetime.now().strftime("Current date is %Y-%m-%d and its %H:%M %p right now.")
append_toolcalls = False
if append_toolcalls:
systemmessage = msg("system", system_prompt + "\n" + current_date_and_time + "\n" + inference.generate_tool_use_header(tool_list))
else:
systemmessage = msg("system", system_prompt + "\n" + current_date_and_time)
terminal = Terminal(inference, systemmessage)
# system message for role flip so the model automatically answers for the user
terminal.roleflip = msg("system", "Keep the conversation going, ask for more information on the subject. Keep messages short at max 1-2 sentences. Do not thank and say goodbye.")
# system messages and user message to bring the model to summarize the entire conversation
terminal.summarize = msg("system", "Summarize the conversation as a single, cohesive paragraph. Avoid using any bullet points, numbers, or list formatting. Write in plain text with natural sentences that flow together seamlessly.")
terminal.summarize_user = msg("system", "Can you summarize the conversation?")
# system message to create a conversation title
terminal.title_prompt = msg("system", "Please create a very short and descriptive title or label for this conversation. Maximum 2-5 words. Use only plain text, avoid numbering, special characters, or unnecessary formatting-focus on clarity and brevity.")
return terminal
def main_func():
inference = Inference(model_selection.get_model())
terminal = initialize_config(inference)
terminal.join()
if __name__ == "__main__":
main_func()

View File

@@ -0,0 +1,3 @@
from chatbug.matheval import ast
from chatbug.matheval import interpreter
from chatbug.matheval import lexer

View File

@@ -1,6 +1,5 @@
from chatbug.matheval import lexer
import math_lexer as lexer from chatbug.matheval.lexer import Token
from math_lexer import Token
class Statement: class Statement:

View File

@@ -1,10 +1,11 @@
import math_ast as ast
from sympy.parsing.sympy_parser import parse_expr from sympy.parsing.sympy_parser import parse_expr
from sympy.core.numbers import Integer, One, Zero from sympy.core.numbers import Integer, One, Zero
from sympy import symbols, Eq, solveset, linsolve, nonlinsolve from sympy import symbols, Eq, solveset, linsolve, nonlinsolve
from sympy.core.symbol import Symbol from sympy.core.symbol import Symbol
from chatbug.matheval import ast
def interpret(statement: ast.Statement) -> str: def interpret(statement: ast.Statement) -> str:

View File

@@ -0,0 +1,95 @@
from chatbug.modelconfig import Modelconfig
def get_model() -> Modelconfig:
# model: NousResearch/Hermes-3-Llama-3.2-3B
# tokens: 315 tk
# time: 94.360 s
# speed: 3.338 tk/s
# vram_bulk: 3622 MB
# vram_top: 80 MB
# context: 131072 tk
# model = Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True)
# model: unsloth/Llama-3.2-1B
# tokens: 589 tk
# time: 39.348 s
# speed: 14.969 tk/s
# vram_bulk: 4708 MB
# vram_top: 102 MB
# context: 131072 tk
# model = Modelconfig("unsloth/Llama-3.2-1B") # note, fast, but talks to itself. basically does not work.
# model: unsloth/Llama-3.2-3B-Instruct
# tokens: 285 tk
# time: 75.363 s
# speed: 3.782 tk/s
# vram_bulk: 3512 MB
# vram_top: 48 MB
# context: 131072 tk
# model = Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True)
# model: unsloth/llama-3-8b-bnb-4bit
# tokens: 435 tk
# time: 84.314 s
# speed: 5.159 tk/s
# vram_bulk: 5440 MB
# vram_top: 216 MB
# context: 8192 tk
# model = Modelconfig("unsloth/llama-3-8b-bnb-4bit")
# Model size: 3.21B params
# vram used: xxxxx MB
# speed xxxxx t/s
# working: DOES NOT LOAD
# model = Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_8bit=True)
# model: unsloth/gemma-2-9b-it-bnb-4bit
# tokens: 154 tk
# time: 32.727 s
# speed: 4.706 tk/s
# vram_bulk: 6156 MB
# vram_top: 232 MB
# context: 8192 tk
# model = Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit")
# model: unsloth/Qwen2.5-7B-Instruct-bnb-4bit
# tokens: 120 tk
# time: 12.248 s
# speed: 9.798 tk/s
# vram_bulk: 5382 MB
# vram_top: 170 MB
# context: 32768 tk
model = Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit") # note, this works really good
# model: unsloth/Qwen2.5-3B-Instruct
# tokens: 112 tk
# time: 12.703 s
# speed: 8.816 tk/s
# vram_bulk: 2108 MB
# vram_top: 98 MB
# context: 32768 tk
# model = Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True)
# model: unsloth/Qwen2.5-3B-Instruct
# tokens: 118 tk
# time: 33.748 s
# speed: 3.497 tk/s
# vram_bulk: 3310 MB
# vram_top: 60 MB
# context: 32768 tk
# model = Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_8bit=True)
# Model size: 3.87B params
# vram used: xxxxx MB
# speed xxxxx t/s
# error: requires the protobuf library but it was not found in your environment
# model = Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit")
return model

20
chatbug/modelconfig.py Normal file
View File

@@ -0,0 +1,20 @@
from transformers import BitsAndBytesConfig
import torch
class Modelconfig:
def __init__(self, model_name, bits_and_bytes_config=None, load_in_8bit=False, load_in_4bit=False):
self.model_name = model_name
if load_in_4bit:
assert bits_and_bytes_config == None
self.bits_and_bytes_config = BitsAndBytesConfig( # tool calls don't really work in 4 bit mode
load_in_4bit=True,
bnb_4bit_quant_type="nf4", # Recommended for better performance
bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation
)
elif load_in_8bit:
assert bits_and_bytes_config == None
self.bits_and_bytes_config = BitsAndBytesConfig(load_in_8bit=True)
else:
self.bits_and_bytes_config = bits_and_bytes_config

View File

@@ -1,10 +1,8 @@
import random import random
import datetime import datetime
from tool_helper import tool from chatbug.tool_helper import tool
import math_lexer import chatbug.matheval as matheval
import math_ast import chatbug.utils as utils
import math_interpreter
import utils
# @tool # @tool
@@ -39,10 +37,10 @@ def math_evaluate(expression: str):
Args: Args:
expression: A valid arithmetic expression (e.g., '2 + 3 * 4'). The expression must not contain '='.""" expression: A valid arithmetic expression (e.g., '2 + 3 * 4'). The expression must not contain '='."""
try: try:
tokens = math_lexer.tokenize(expression) tokens = matheval.lexer.tokenize(expression)
parser = math_ast.Parser() parser = matheval.ast.Parser()
ast = parser.parse(tokens) ast = parser.parse(tokens)
return math_interpreter.interpret(ast) return matheval.interpreter.interpret(ast)
except Exception as e: except Exception as e:
utils.print_error("Tool call evaluation failed. - " + str(e)) utils.print_error("Tool call evaluation failed. - " + str(e))
return "Tool call evaluation failed." return "Tool call evaluation failed."
@@ -58,10 +56,10 @@ Args:
expression = "solve " + " and ".join(equations) + " for " + " and ".join(variables) expression = "solve " + " and ".join(equations) + " for " + " and ".join(variables)
print(expression) print(expression)
tokens = math_lexer.tokenize(expression) tokens = matheval.lexer.tokenize(expression)
parser = math_ast.Parser() parser = ast.Parser()
ast = parser.parse(tokens) ast = parser.parse(tokens)
return math_interpreter.interpret(ast) return matheval.interpreter.interpret(ast)
except Exception as e: except Exception as e:
utils.print_error("Tool call evaluation failed. - " + str(e)) utils.print_error("Tool call evaluation failed. - " + str(e))
return "Tool call evaluation failed." return "Tool call evaluation failed."

View File

@@ -2,7 +2,7 @@
from typing import Callable, List, Optional from typing import Callable, List, Optional
import json import json
import re import re
import utils import chatbug.utils as utils
tool_list = [] tool_list = []

0
chatbug/ui/__init__.py Normal file
View File

20
chatbug/ui/__main__.py Normal file
View File

@@ -0,0 +1,20 @@
from .server import start_server
from .serverwait import wait_for_server
from .ui import start_ui, _start_sandboxed
def start_ui():
svr = start_server(start_thread=False)
url = f"http://localhost:{svr.port}"
# wait_for_server(url)
# # start_ui(threaded=False)
# import webview
# w = webview.create_window('asdf', '../../web/index.html', min_size=(1200, 900), zoomable=True)
# webview.start(ssl=True)
if __name__ == "__main__":
start_ui()

3771
chatbug/ui/bottle.py Normal file

File diff suppressed because it is too large Load Diff

50
chatbug/ui/bottle_svr.py Normal file
View File

@@ -0,0 +1,50 @@
#tornado needs this or it does not run
import asyncio
try:
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
except AttributeError:
print("Probably running on linux")
from bottle import route, run, response, static_file, request, post
from .file_watchdog import FileWatchdog
class BottleServer:
def __init__(self, listen="0.0.0.0", port=8080, start_thread=True, root="web"):
self.root = root
self.port = port
self.listen = listen
self.wdt = FileWatchdog(self.root)
if start_thread:
import threading
self.thread = threading.Thread(target=self._run, args=())
self.thread.name = "BottleServerThread"
self.thread.daemon = True
self.thread.start()
else:
self._run()
def _home(self):
return static_file("index.html", root= self.root)
def _watchdog(self):
return str(self.wdt.time)
def _files(self, name):
if name.endswith(".vue"):
return static_file(name, root= self.root, mimetype="text/html")
return static_file(name, root= self.root)
def _run(self):
route('/')(self._home)
route('/watchdog')(self._watchdog)
route('/<name:path>')(self._files)
print(f"Starting server at {self.listen}:{self.port}")
run(host=self.listen, port=self.port, debug=False, threaded=True, quiet=True)

View File

@@ -0,0 +1,47 @@
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class MyHandler(FileSystemEventHandler):
def __init__(self, function):
self.function = function
def on_any_event(self, _event):
# Handle the event (e.g., file created, modified, deleted)
self.function()
class FileWatchdog:
def __init__(self, path):
self.path = path
self.time = 0
event_handler = MyHandler(lambda: self.event_handler())
self.observer = Observer()
self.observer.schedule(event_handler, path, recursive=True)
self.observer.start()
def event_handler(self):
#print("change detected")
self.time = time.time()
def stop(self):
self.observer.stop()
if __name__ == "__main__":
wdt = FileWatchdog("./web")
try:
while True:
time.sleep(1)
print(wdt.time)
except KeyboardInterrupt:
wdt.stop()

10
chatbug/ui/server.py Normal file
View File

@@ -0,0 +1,10 @@
from .bottle_svr import BottleServer
def start_server(start_thread=False):
print("server start")
return BottleServer(start_thread=start_thread, root="web")
if __name__ == "__main__":
start_server()

29
chatbug/ui/serverwait.py Normal file
View File

@@ -0,0 +1,29 @@
import time
import requests
import socket
def wait_for_server(url, timeout=10, retry_interval=0.5):
"""
Waits for a web server to become available by polling its URL.
"""
start_time = time.monotonic()
while time.monotonic() - start_time < timeout:
try:
# First, try a simple TCP connection to check if the port is open
hostname, port = url.split("//")[1].split(":")
port = int(port)
with socket.create_connection((hostname, port), timeout=retry_interval):
pass # If the connection succeeds, continue to the HTTP check
# Then, make an HTTP request to ensure the server is responding correctly
response = requests.get(url, timeout=retry_interval)
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
return # Server is up and responding correctly
except (requests.exceptions.RequestException, socket.error) as e:
print(f"Server not yet available: {e}. Retrying in {retry_interval} seconds...")
time.sleep(retry_interval)
raise TimeoutError(f"Server at {url} did not become available within {timeout} seconds.")

30
chatbug/ui/ui.py Normal file
View File

@@ -0,0 +1,30 @@
import webview
from threading import Thread
def start_ui(threaded=False):
if threaded:
_start_threaded()
else:
_start_normal()
def _start_threaded():
t = Thread(target=start_ui, args=[False])
t.run()
def _start_normal():
webview.create_window('Geargenerator', 'http://localhost:8080', min_size=(1200, 900), zoomable=True)
webview.start()
def _start_sandboxed():
webview.create_window('Geargenerator', 'web_v2/geargenerator.html', min_size=(1200, 900), zoomable=True)
webview.start(ssl=True)
if __name__ == "__main__":
_start_sandboxed()
# start_ui(threaded=False)

175
llama.py
View File

@@ -1,175 +0,0 @@
import time
import random
from tool_helper import tool_list, parse_and_execute_tool_call
from tool_functions import register_dummy
from inference import Inference, torch_reseed
import datetime
messages = []
inference = None
# systemmessage at the very begin of the chat. Will be concatenated with the automatic tool usage descriptions
systemmessage = "Hold a casual conversation with the user. Keep responses short at max 3 sentences. Answer using markdown to the user."
# system message for role flip so the model automatically answers for the user
roleflip = {"role": "system", "content": "Keep the conversation going, ask for more information on the subject. Keep messages short at max 1-2 sentences. Do not thank and say goodbye."}
# system messages and user message to bring the model to summarize the entire conversation
summarize = {"role": "system", "content": "Summarize the conversation as a single, cohesive paragraph. Avoid using any bullet points, numbers, or list formatting. Write in plain text with natural sentences that flow together seamlessly."}
summarize_user = {"role": "system", "content": "Can you summarize the conversation?"}
# system message to create a conversation title
title_prompt = {"role": "system", "content": "Please create a very short and descriptive title or label for this conversation. Maximum 2-5 words. Use only plain text, avoid numbering, special characters, or unnecessary formatting-focus on clarity and brevity."}
register_dummy()
def append_generate_chat(input_text: str, role="user"):
t_start = time.time()
# generate AI response
if input_text != None:
messages.append({"role": role, "content": input_text})
inputs = inference.tokenize(messages, tokenize=True)
outputs, out_text = inference.generate(inputs)
# append result to message history
messages.append({"role": "assistant", "content": out_text})
print("")
print("generation took %.3fs (%d tokens)" % (time.time() - t_start, len(outputs[0])))
# handle tool call and check if a tool call has happened.
tool_result = parse_and_execute_tool_call(out_text, tool_list)
if tool_result != None:
# tool call happened
tool_result = "<tool_response>%s</tool_response>" % tool_result
# depending on the chat template the tool response tags must or must not be passed. :(
append_generate_chat(tool_result, role="tool")
def main():
global messages
global inference
inference = Inference()
current_date_and_time = datetime.datetime.now().strftime("Current date is %Y-%m-%d and its %H:%M %p right now.")
messages = [{"role": "system", "content": systemmessage + "\n" + current_date_and_time + "\n" + inference.generate_tool_use_header(tool_list)}]
while True:
# print an input prompt to receive text or commands
input_text = input(">>> ")
print("")
if input_text.startswith("!"):
append_generate_chat("<tool_response>%s</tool_response>" % input_text[1:], role="tool")
# append_generate_chat("%s" % input_text[1:], role="tool") # depending on the chat template the tool response tags must or must not be passed. :(
elif input_text.startswith("/clear"):
print("clearing chat history")
start_msg = messages[0]
messages = [start_msg]
print("")
elif input_text.startswith("/history"):
history = inference.tokenize(messages, tokenize=False)
# history = tokenizer.apply_chat_template(messages, return_tensors="pt", tokenize=False, add_generation_prompt=False)
print(history)
elif input_text.startswith("/undo"):
if len(messages) > 2:
print("undo latest prompt")
messages = messages[:-2]
else:
print("cannot undo because there are not enough messages on history.")
print("")
elif input_text.startswith("/regen"):
if len(messages) >= 2:
print("regenerating message (not working)")
messages = messages[:-1]
seed = random.randint(0, 2**32 - 1) # Generate a random seed
torch_reseed(seed)
append_generate_chat(None)
else:
print("cannot regenerate because there are not enough messages on history.")
print("")
elif input_text.startswith("/more"):
append_generate_chat(None)
elif input_text.startswith("/file"):
filename = input_text[len("/file "):]
print("read '%s' for prompt:" % filename)
with open(filename, "r") as f:
content = f.read()
print(content)
append_generate_chat(content)
elif input_text.startswith("/auto"):
messages_backup = messages
messages = [roleflip]
for m in messages_backup:
role = m["role"]
content = m["content"]
if role == "user":
role = "assistant"
elif role == "assistant":
role = "user"
if role != "system":
messages.append({"role": role, "content": content})
append_generate_chat(None) # will automatically advance the conversation as 'user'
last_message = messages[-1]
last_message["role"] = "user"
messages = messages_backup + [last_message]
append_generate_chat(None) # 'regular' chatbot answer
elif input_text.startswith("/summarize"):
messages_temp = list(filter(lambda x: x["role"] != "system", messages))
messages_temp = [summarize] + messages_temp + [summarize_user] # copy dict in last instance
# messages_temp[-1]["role"] = "user"
input_ids = inference.tokenize(messages_temp, tokenize=True, assistant_prefix="The conversation was about ")
generated_tokens, full_output = inference.generate(input_ids)
elif input_text.startswith("/title"):
messages_temp = list(filter(lambda x: x["role"] != "system", messages))
messages_temp = [title_prompt] + messages_temp #+ [dict(title)] # copy dict in last instance
messages_temp[-1]["role"] = "user"
input_ids = inference.tokenize(messages_temp, tokenize=True, assistant_prefix="Title: ")
generated_tokens, full_output = inference.generate(input_ids)
elif input_text.startswith("/help"):
print("!<prompt> answer as 'tool' in <tool_response> tags")
print("/clear clear chat history")
print("/undo undo latest prompt")
print("/regen regenerate the last message")
print("/more generate more additional information")
print("/file read prompt input from file")
print("/auto automatically advance conversation")
print("/summarize generate a summary of the chat")
print("/title generate a title of the chat")
print("/help print this message")
print("")
elif input_text.startswith("/"):
print("unknown command.")
else:
append_generate_chat(input_text)
if __name__ == "__main__":
main()

View File

@@ -1,3 +1,4 @@
transformers transformers
accelerate accelerate
bitsandbytes bitsandbytes
pytest

28
setup.py Normal file
View File

@@ -0,0 +1,28 @@
from setuptools import setup, find_packages
setup(
name='chatbug',
version='0.1.0',
description='A conversational AI chatbot',
author='Florin Tobler',
author_email='florin.tobler@hotmail.com',
packages=find_packages(exclude=["tests"]),
install_requires=[
'transformers',
'accelerate',
'bitsandbytes',
'pytest',
'pywebview',
],
entry_points={
'console_scripts': [
'chatbug=chatbug.llama:main_func',
# a^ b^ c^ d^
# a => the command line argument
# b => the package name
# c => the file name in the package (same as imports)
# d => the function to call
'chatbugui=chatbug.ui.__main__:start_ui',
],
},
)

View File

@@ -1 +0,0 @@
# empty

View File

@@ -1,32 +1,20 @@
import pytest import pytest
import tests.helper as helper from tests import helper
inference = None inference = None
InferenceClass = None
Tensor = None Tensor = None
def prepare(): def prepare():
if InferenceClass == None:
test_import_inference_module_librarys()
if inference == None:
test_instantiate_inference_instance()
def test_import_inference_module_librarys():
import inference
import torch
global InferenceClass
global Tensor
InferenceClass = inference.Inference
Tensor = torch.Tensor
def test_instantiate_inference_instance():
if InferenceClass == None:
test_import_inference_module_librarys()
global inference global inference
inference = InferenceClass() global Tensor
if inference == None:
from torch import Tensor as _Tensor
from chatbug.inference import Inference
from chatbug.model_selection import get_model
inference = Inference(get_model())
Tensor = _Tensor
def test_tool_header_generation(): def test_tool_header_generation():

View File

@@ -1,6 +1,6 @@
import pytest import pytest
import tool_helper import chatbug.tool_helper as tool_helper
import tests.helper as helper from tests import helper

View File

@@ -1,6 +1,6 @@
import pytest import pytest
import tool_functions import chatbug.tool_functions as tool_functions
from tests import helper
def test_math_evaluate_1(): def test_math_evaluate_1():
@@ -28,6 +28,13 @@ def test_math_evaluate_5():
result = tool_functions.math_evaluate("sin(pi/2) + cos(0)") result = tool_functions.math_evaluate("sin(pi/2) + cos(0)")
assert result == "sin(pi/2) + cos(0) = 2" assert result == "sin(pi/2) + cos(0) = 2"
def test_math_evaluate_solve_a():
result = tool_functions.math_evaluate("solve 240=x*r+x*r^2+x*r^3+s and r=1.618 and s=5 for x, r, s")
assert result == "Solved equation system 240 = r**3*x + r**2*x + r*x + s, r = 1.61800000000000 and s = 5 for x=27.7393327937747=~27.739, r=1.61800000000000=~1.618 and s=5.00000000000000=~5.000."
def test_math_evaluate_solve_b():
result = tool_functions.math_evaluate("solve 250=x+x*r+s and r=1.618 and s=0 for x, r, s")
assert result == "Solved equation system 250 = r*x + s + x, r = 1.61800000000000 and s = 0 for x=95.4927425515661=~95.493, r=1.61800000000000=~1.618 and s=0."
@@ -54,4 +61,3 @@ def test_math_solver_3b():
def test_math_solver_4(): def test_math_solver_4():
result = tool_functions.math_evaluate("solve 2*x**3 + 3*y = 7 and x - y = 1 for x, y") result = tool_functions.math_evaluate("solve 2*x**3 + 3*y = 7 and x - y = 1 for x, y")
assert result == "Solved equation system 2*x**3 + 3*y = 7 and x - y = 1 for x=~1.421 and y=~0.421." assert result == "Solved equation system 2*x**3 + 3*y = 7 and x - y = 1 for x=~1.421 and y=~0.421."

View File

@@ -1,7 +1,8 @@
import pytest import pytest
import tool_helper from chatbug import tool_helper
from unittest import mock from unittest import mock
import tests.helper as helper from tests import helper
import re
@@ -40,34 +41,34 @@ def test_match_and_extract_matching3_with_newline():
def test_string_malformed_faulty(): def test_string_malformed_faulty():
with mock.patch("utils.print_error") as print_error_mock: with mock.patch("chatbug.utils.print_error") as print_error_mock:
result = tool_helper._execute_tool_call_str("{json_content}", []) result = tool_helper._execute_tool_call_str("{json_content}", [])
assert result == None assert result == None
print_error_mock.assert_called_once() # this will check if the mocked function on the context was called. print_error_mock.assert_called_once() # this will check if the mocked function on the context was called.
def test_tool_call_json_1(): def test_tool_call_json_1():
with mock.patch("utils.print_error") as print_error_mock: with mock.patch("chatbug.utils.print_error") as print_error_mock:
result = tool_helper._execute_tool_call_json({"name": "tool_dummy", "arguments": {"a": 1, "b": "zwei"}}, [helper.tool_dummy, helper.tool_dummy2]) result = tool_helper._execute_tool_call_json({"name": "tool_dummy", "arguments": {"a": 1, "b": "zwei"}}, [helper.tool_dummy, helper.tool_dummy2])
assert result == "result_1_zwei" assert result == "result_1_zwei"
assert print_error_mock.call_count == 0 assert print_error_mock.call_count == 0
def test_tool_call_json_2(): def test_tool_call_json_2():
with mock.patch("utils.print_error") as print_error_mock: with mock.patch("chatbug.utils.print_error") as print_error_mock:
result = tool_helper._execute_tool_call_json({"name": "tool_dummy2", "arguments": {"text": "some_text"}}, [helper.tool_dummy, helper.tool_dummy2]) result = tool_helper._execute_tool_call_json({"name": "tool_dummy2", "arguments": {"text": "some_text"}}, [helper.tool_dummy, helper.tool_dummy2])
assert result == "SOME_TEXT" assert result == "SOME_TEXT"
assert print_error_mock.call_count == 0 assert print_error_mock.call_count == 0
def test_tool_call_json_non_existing_call_check(): def test_tool_call_json_non_existing_call_check():
with mock.patch("utils.print_error") as print_error_mock: with mock.patch("chatbug.utils.print_error") as print_error_mock:
result = tool_helper._execute_tool_call_json({"name": "tool_dummy_which_is_not_existing", "arguments": {"text": "some_text"}}, [helper.tool_dummy, helper.tool_dummy2]) result = tool_helper._execute_tool_call_json({"name": "tool_dummy_which_is_not_existing", "arguments": {"text": "some_text"}}, [helper.tool_dummy, helper.tool_dummy2])
assert result == None assert result == None
assert print_error_mock.call_count == 1 # this will check if the mocked function on the context was called. assert print_error_mock.call_count == 1 # this will check if the mocked function on the context was called.
def test_tool_call_json_wrong_arguments_check(): def test_tool_call_json_wrong_arguments_check():
with mock.patch("utils.print_error") as print_error_mock: with mock.patch("chatbug.utils.print_error") as print_error_mock:
result = tool_helper._execute_tool_call_json({"name": "tool_dummy", "arguments": {"a": "must_be_an_int_but_is_string", "b": "zwei"}}, [helper.tool_dummy, helper.tool_dummy2]) result = tool_helper._execute_tool_call_json({"name": "tool_dummy", "arguments": {"a": "must_be_an_int_but_is_string", "b": "zwei"}}, [helper.tool_dummy, helper.tool_dummy2])
assert result == None assert result == None
assert print_error_mock.call_count == 1 # this will check if the mocked function on the context was called. assert print_error_mock.call_count == 1 # this will check if the mocked function on the context was called.
@@ -75,7 +76,6 @@ def test_tool_call_json_wrong_arguments_check():
def test_regex_multiline(): def test_regex_multiline():
import re
pattern = r"<start>(.*)</end>" pattern = r"<start>(.*)</end>"
# The text to search (spanning multiple lines) # The text to search (spanning multiple lines)

61
web/index.html Normal file
View File

@@ -0,0 +1,61 @@
<!DOCTYPE html>
<html lang="en">
<head>
<!-- <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script> -->
<link rel="stylesheet" href="stylesheet.css">
<script src="alpine.min.js"></script>
<script src="main.js"></script>
<script src="watchdog.js"></script>
</head>
<body>
<div class="sidebar">
<h1>Chatbug 🪲</h1>
<div class="button">🐛 New Chat</div>
<div class="title">Today</div>
<div class="button">Building Web UI with Bottle & Alpine.js</div>
<div class="button">Coding in python</div>
<div class="title">Last Week</div>
<div class="title">Older</div>
</div>
<div class="mainarea">
<!-- <h1 x-data="{ message: 'I ❤️ Alpine' }" x-text="message"></h1> -->
<div class="message">
<div class="bubble">Hello world</div>
</div>
<div class="response">
<div class="">Hello! Nice to meet you. What's up?</div>
</div>
<div class="message">
<div class="bubble">ah, just holding an example conversation with you</div>
</div>
<div class="response">
<div class="">Got it! Fun stuff. What kind of projects are you working on these days?</div>
</div>
<div class="message">
<div class="bubble">LLM chatbot named chatbug 🪲</div>
</div>
<div class="response">
<div class="">Cool name! Chatbug sounds like a friendly one. How's it going?</div>
</div>
<div class="message">
<div class="bubble">making a web ui with bottle and alpinejs</div>
</div>
<div class="input">
<!-- toolbutton for tool submenu, normally hidden unless pressed -->
<div class="button">+</div>
<div class="tool list" style="display:none">
<div class="tool button">attach file</div>
<div class="tool button">regenerate</div>
<div class="tool button">undo</div>
</div>
<input type="text">
<!-- send -->
<div class="button"></div>
</div>
</div>
</body>
</html>

25
web/main.js Normal file
View File

@@ -0,0 +1,25 @@
// import {createApp, ref, reactive} from 'vue';
// const app = createApp({
// data() {
// let msg = ref("hello world")
// try {
// msg.value = "" + pywebview.api
// } catch (e) {
// msg.value = "did not invoke " + e
// }
// window.msg = msg
// return {
// message: msg
// };
// }
// });
// app.mount('#app');

117
web/stylesheet.css Normal file
View File

@@ -0,0 +1,117 @@
body {
background-color: black;
color: white;
font-family: Arial, Helvetica, sans-serif;
margin: 0px;
height: 100%;
}
.sidebar {
width: 250px;
background-color: #2a262a;
float: left;
height: 100%;
position: absolute;
}
.sidebar h1 {
margin: 20px;
}
.sidebar .title {
font-size: 8pt;
margin: 20px;
margin-top: 30px;
margin-bottom: 10px;
}
.sidebar .button {
margin-left: 10px;
margin-right: 10px;
padding: 10px;
border-radius: 10px;
}
.sidebar .button:hover {
background-color: #423a42;
}
.mainarea {
margin-left: 260px;
height: 100%;
position: absolute;
right: 0;
left: 0;
}
.message {
display: flex;
margin-left: 40px;
margin-right: 10px;
}
.bubble {
padding: 10px;
border-radius: 10px;
background-color: #416146;
margin-left: auto;
float: right;
position: relative;
}
.response {
display: flex;
margin: 30px;
position: relative;
}
.response::before {
content: '🪲';
position: absolute;
top: -4px;
left: -30px;
}
.input {
display: flex;
justify-content: space-between;
align-items: center;
padding: 10px;
background-color: #2a262a;
border-radius: 10px;
width: 70%;
margin: auto;
position: absolute;
bottom: 40px;
}
.tool.list {
display: none;
background-color: #fff;
border: 1px solid #ccc;
position: absolute;
top: 100%;
left: 0;
z-index: 1;
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
}
.tool.button {
cursor: pointer;
padding: 5px 10px;
margin: 5px;
}
.input input {
flex-grow: 1;
padding: 10px;
border: 0px solid #ccc;
background: none;
color: white;
}
.input input:focus {
outline: 0px solid black; /* Custom focus outline */
}

67
web/watchdog.js Normal file
View File

@@ -0,0 +1,67 @@
wdt = {
last_wdt_time: 0,
watchdog_counter: 0
}
pollFileChange = () => {
setTimeout(() => {
wdt.watchdog_counter++
console.log(wdt.watchdog_counter)
if (wdt.watchdog_counter > 20) {
return
}
ajax({
type: "GET",
url: "/watchdog",
success: (data) => {
var time = Number(data)
if (wdt.last_wdt_time == 0) {
wdt.last_wdt_time = time
pollFileChange()
} else if (time > wdt.last_wdt_time) {
location.reload();
} else {
pollFileChange()
}
},
})
}, 10000)
}
function ajax(setting) {
if (typeof(shutdown) !== 'undefined') return
var request = new XMLHttpRequest();
request.open(setting.type, setting.url, true);
request.setRequestHeader('Content-Type', setting.dataType)
request.onload = function(data) {
if (typeof(shutdown) !== 'undefined') return
if (this.status >= 200 && this.status < 400) {
if (setting.success) {
setting.success(this.response)
}
} else {
if (setting.error) {
setting.error(this.response)
}
}
}
request.onerror = function(data) {
if (typeof(shutdown) !== 'undefined') return
if (setting.error) {
setting.error(data)
}
}
if (setting.data) {
request.send(setting.data)
} else {
request.send()
}
}
pollFileChange()