UI prototype

start cases
force encoding
2025-01-15 23:39:33 +01:00 · 2025-01-15 23:39:09 +01:00 · 2025-01-15 23:38:50 +01:00 · 2025-01-15 23:38:39 +01:00 · 2025-01-14 20:29:29 +01:00 · 2025-01-13 23:33:51 +01:00
41 changed files with 4847 additions and 267 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 /model/*
 *.prof
 __pycache__
-*.venv
+*.venv
+*.egg-info
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -15,7 +15,7 @@
            "name": "PyDebug: __main__.py",
            "type": "debugpy",
            "request": "launch",
-            "program": "__main__.py",
+            "program": "chatbug/__main__.py",
            "console": "integratedTerminal"
        }
    ]
--- a/chatbug/init.py
+++ b/chatbug/init.py
--- a/chatbug/main.py
+++ b/chatbug/main.py
@@ -1,6 +1,7 @@
 print("running __main__.-py")

-from llama import main
+from chatbug.llama import main_func
+

 if __name__ == "__main__":
-    main()
+    main_func()
--- a/chatbug/download_model.py
+++ b/chatbug/download_model.py
@@ -0,0 +1,37 @@
+
+
+from chatbug.inference import Inference
+from chatbug.modelconfig import Modelconfig
+
+
+def main():
+    # Model size: 3.21B params
+    Inference(Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True))
+
+    # Model size: 1.24B params
+    Inference(Modelconfig("unsloth/Llama-3.2-1B", load_in_8bit=True))
+
+    # Model size: 3.21B params
+    Inference(Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True))
+
+    # Model size: 4.65B params
+    Inference(Modelconfig("unsloth/llama-3-8b-bnb-4bit", load_in_4bit=True))
+
+    # Model size: 3.21B params
+    Inference(Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_4bit=True))
+
+    # Model size: 5.21B params
+    Inference(Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit", load_in_4bit=True))
+
+    # Model size: 4.46B params
+    Inference(Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit", load_in_4bit=True))
+
+    # Model size: 3.09B params
+    Inference(Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True))
+
+    # Model size: 3.87B params
+    Inference(Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit", load_in_4bit=True))
+
+
+if __name__ == "__main__":
+    main()
--- a/chatbug/file_append.py
+++ b/chatbug/file_append.py
@@ -0,0 +1,46 @@
+import os
+
+
+def check_append_file(prompt: str) -> str:
+    if "@" in prompt:
+        parts = prompt.split(" ")
+        content = []
+        for part in parts:
+            if part.startswith("@"):
+                filename = part[1:]
+                try:
+                    if os.path.exists(filename):
+                        with open(filename, "r", encoding="utf-8") as f:
+                            content.append("%s:'''\n%s'''" % (filename, f.read()))
+                except FileNotFoundError:
+                    print(f"File '{filename}' not found.")
+                except Exception as e:
+                    print("exception encountered %s", e)
+        content.append(prompt)
+        return "\n".join(content)
+    return prompt
+
+
+
+if __name__ == "__main__":
+    exit()  # not accidentally trigger it
+
+    # Create some sample files
+    with open("fmain.py", "w") as f:
+        f.write("# This is main.py\n")
+    with open("finference.py", "w") as f:
+        f.write("# This is inference.py\n")
+
+    # Test cases
+    test_prompts = [
+        "@fmain.py",
+        "@fmain.py @finference.py",
+        "@fnonexistent.py",
+        "@fmain.py @fnonexistent.py"
+    ]
+
+    for prompt in test_prompts:
+        print(f"Testing prompt: {prompt}")
+        result = check_append_file(prompt)
+        print(f"Result: {result}")
+        print("-" * 20)
--- a/chatbug/generation_loop.py
+++ b/chatbug/generation_loop.py
@@ -0,0 +1,170 @@
+import time
+import json
+import random
+from chatbug.tool_helper import tool_list, parse_and_execute_tool_call
+from chatbug.inference import Inference, torch_reseed
+from chatbug.file_append import check_append_file
+
+
+
+def msg(role: str, content: str) -> dict:
+    return {"role": role, "content": content}
+
+
+class Terminal:
+
+    def __init__(self, inference: Inference, systemmessage: dict):
+        self.inference = inference
+        self.messages:list[dict] = [systemmessage]
+
+        # these are meant to be overwritten by better ones
+        self.roleflip = msg("system", "keep going.")
+        self.summarize = msg("system", "summarize conversation")
+        self.summarize_user = msg("system", "please summarize conversation")
+        self.title_prompt = msg("system", "create a title for this conversation")
+
+    def append_generate_chat(self, input_text: str, role="user"):
+        t_start = time.time()
+
+        # generate AI response
+        if input_text != None:
+            self.messages.append({"role": role, "content": input_text})
+
+        inputs = self.inference.tokenize(self.messages, tokenize=True)
+        number_of_input_tokens = inputs.shape[1]
+
+        outputs, out_text = self.inference.generate(inputs)
+
+        # append result to message history
+        self.messages.append({"role": "assistant", "content": out_text})
+
+        print("")
+        time_taken = time.time() - t_start
+        number_of_tokens = len(outputs[0])
+        tokens_per_second = (number_of_tokens - number_of_input_tokens) / time_taken
+        print("generation took %.3fs (%d tokens, %.3f t/s)" % (time_taken, number_of_tokens, tokens_per_second))
+
+        # handle tool call and check if a tool call has happened.
+        tool_result = parse_and_execute_tool_call(out_text, tool_list)
+        if tool_result != None:
+            # tool call happened
+            tool_result = "<tool_response>%s</tool_response>" % tool_result
+            # depending on the chat template the tool response tags must or must not be passed. :(
+            self.append_generate_chat(tool_result, role="tool")
+
+    def join(self):
+
+        while True:
+            # print an input prompt to receive text or commands
+            input_text = input(">>> ")
+            print("")
+
+            input_text = check_append_file(input_text)
+
+
+            if input_text.startswith("!"):
+                self.append_generate_chat("<tool_response>%s</tool_response>" % input_text[1:], role="tool")
+                # append_generate_chat("%s" % input_text[1:], role="tool")  # depending on the chat template the tool response tags must or must not be passed. :(
+
+            elif input_text.startswith("/clear"):
+                print("clearing chat history")
+                start_msg = self.messages[0]
+                self.message = [start_msg]
+                print("")
+
+            elif input_text.startswith("/history"):
+                history = self.inference.tokenize(self.messages, tokenize=False)
+                # history = tokenizer.apply_chat_template(self.message, return_tensors="pt", tokenize=False, add_generation_prompt=False)
+                print(history)
+
+            elif input_text.startswith("/undo"):
+                if len(self.messages) > 2:
+                    print("undo latest prompt")
+                    self.message = self.messages[:-2]
+                else:
+                    print("cannot undo because there are not enough self.message on history.")
+                print("")
+
+            elif input_text.startswith("/regen"):
+                if len(self.messages) >= 2:
+                    print("regenerating message (not working)")
+                    self.messages = self.messages[:-1]
+                    seed = random.randint(0, 2**32 - 1)  # Generate a random seed
+                    torch_reseed(seed)
+                    self.append_generate_chat(None)
+                else:
+                    print("cannot regenerate because there are not enough self.message on history.")
+                print("")
+
+            elif input_text.startswith("/more"):
+                self.append_generate_chat(None)
+
+            elif input_text.startswith("/file"):
+                filename = input_text[len("/file "):]
+                print("read '%s' for prompt:" % filename)
+                with open(filename, "r") as f:
+                    content = f.read()
+                print(content)
+                self.append_generate_chat(content)
+
+            elif input_text.startswith("/auto"):
+                message_backup = self.messages
+                self.messages = [self.roleflip]
+                for m in self.message_backup:
+                    role = m["role"]
+                    content = m["content"]
+                    if role == "user":
+                        role = "assistant"
+                    elif role == "assistant":
+                        role = "user"
+                    if role != "system":
+                        self.message.append({"role": role, "content": content})
+                self.append_generate_chat(None)  # will automatically advance the conversation as 'user'
+                last_message = self.messages[-1]
+                last_message["role"] = "user"
+                self.messages = message_backup + [last_message]
+                self.append_generate_chat(None)  # 'regular' chatbot answer
+
+            elif input_text.startswith("/summarize"):
+                messages_temp = list(filter(lambda x: x["role"] != "system", self.messages))
+                messages_temp = [self.summarize] + messages_temp + [self.summarize_user]  # copy dict in last instance
+                # messages_temp[-1]["role"] = "user"
+                input_ids = self.inference.tokenize(messages_temp, tokenize=True, assistant_prefix="The conversation was about ")
+                generated_tokens, full_output = self.inference.generate(input_ids)
+
+            elif input_text.startswith("/title"):
+                messages_temp = list(filter(lambda x: x["role"] != "system", self.messages))
+                messages_temp = [self.title_prompt] + messages_temp #+ [dict(title)]  # copy dict in last instance
+                messages_temp[-1]["role"] = "user"
+                input_ids = self.inference.tokenize(messages_temp, tokenize=True, assistant_prefix="Title: ")
+                generated_tokens, full_output = self.inference.generate(input_ids)
+
+            elif input_text.startswith("/save"):
+                with open("messages.json", "w") as f:
+                    json.dump(self.messages, f, indent=4)
+
+            elif input_text.startswith("/load"):
+                with open("messages.json", "r") as f:
+                    new_messages = json.load(f)
+                    self.messages = [self.messages[0]] + new_messages[1:]
+
+            elif input_text.startswith("/help"):
+                print("!<prompt>   answer as 'tool' in <tool_response> tags")
+                print("/clear      clear chat history")
+                print("/undo       undo latest prompt")
+                print("/regen      regenerate the last message")
+                print("/more       generate more additional information")
+                print("/file       read prompt input from file")
+                print("/auto       automatically advance conversation")
+                print("/summarize  generate a summary of the chat")
+                print("/title      generate a title of the chat")
+                print("/save       write chat history to file")
+                print("/load       load previously saved history")
+                print("/help       print this message")
+                print("")
+
+            elif input_text.startswith("/"):
+                print("unknown command.")
+
+            else:
+                self.append_generate_chat(input_text)
--- a/chatbug/gpt2.py
+++ b/chatbug/gpt2.py
--- a/chatbug/inference.py
+++ b/chatbug/inference.py
@@ -14,44 +14,52 @@ from transformers.cache_utils import (
 )
 import torch
 import time
-import utils
 import re
 import os
+import chatbug.utils as utils
+from chatbug.modelconfig import Modelconfig

 torch.set_num_threads(os.cpu_count())  # Adjust this to the number of threads/cores you have


 class Inference:
-    def __init__(self):
-        print("loading LLM...")
+    def __init__(self, modelconfig: Modelconfig):
+        print("loading LLM '%s'..." % modelconfig.model_name)
        t_start = time.time()

        # model_name = "NousResearch/Llama-2-7b-hf"  # will cache on C:\Users\ftobler\.cache\huggingface\hub
-        model_name = "NousResearch/Hermes-3-Llama-3.2-3B"  # will cache on C:\Users\ftobler\.cache\huggingface\hub
+        # model_name = "NousResearch/Hermes-3-Llama-3.2-3B"  # will cache on C:\Users\ftobler\.cache\huggingface\hub
+        # model_name = "unsloth/phi-4-unsloth-bnb-4bit" #too big
        # model_name = "gpt2"
        # model_name = "NousResearch/Hermes-2-Pro-Llama-3-8B"
        # model_name = "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2"
        # "meta-llama/Llama-2-7b-hf"  # Replace with your chosen model


-        quantization_config_4bit = BitsAndBytesConfig(  # tool calls don't really work in 4 bit mode
-            load_in_4bit=True,
-            bnb_4bit_quant_type="nf4",  # Recommended for better performance
-            bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
-            bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation
-        )
+        # quantization_config_4bit = BitsAndBytesConfig(  # tool calls don't really work in 4 bit mode
+        #     load_in_4bit=True,
+        #     bnb_4bit_quant_type="nf4",  # Recommended for better performance
+        #     bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
+        #     bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation
+        # )

-        quantization_config_8bit = BitsAndBytesConfig(load_in_8bit=True)
+        # quantization_config_8bit = BitsAndBytesConfig(load_in_8bit=True)

        # Load the model with quantization (optional)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            # device_map="auto",  # Automatically places parts of the model on GPU/CPU
-            # device_map="cuda",  # Automatically places parts of the model on GPU/CPU
-            device_map="cuda",  # Automatically places parts of the model on GPU/CPU
-            # load_in_8bit=True,   # Enables 8-bit quantization if bitsandbytes is installed
-            quantization_config=quantization_config_8bit
-        )
+        if modelconfig.bits_and_bytes_config != None:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                modelconfig.model_name,
+                # device_map="auto",  # Automatically places parts of the model on GPU/CPU
+                # device_map="cuda",  # Automatically places parts of the model on GPU/CPU
+                device_map="cuda",  # Automatically places parts of the model on GPU/CPU
+                # load_in_8bit=True,   # Enables 8-bit quantization if bitsandbytes is installed
+                quantization_config=modelconfig.bits_and_bytes_config
+            )
+        else:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                modelconfig.model_name,
+                device_map="cuda",
+            )

        # print("apply optimization")
        # self.model.generation_config.cache_implementation = "static"
@@ -59,25 +67,25 @@ class Inference:


        # Load tokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(modelconfig.model_name)

        print("load took %.3fs" % (time.time() - t_start))

-        max_context_length = self.model.config.max_position_embeddings
+        self.max_context_length = self.model.config.max_position_embeddings


        self.tokenizer.chat_template = utils.load_json_file("chat_template.json")

-        print("max_context_length is %d tokens." % (max_context_length))
+        print("max_context_length is %d tokens." % (self.max_context_length))


-    def generate(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+    def generate(self, input_ids: torch.Tensor, print_stdout=True) -> tuple[torch.Tensor, str]:
        with torch.inference_mode():
            with torch.no_grad():
-                return self.generate_incremental_2(input_ids)
+                return self.generate_incremental_2(input_ids, print_stdout)


-    def generate_batch(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+    def generate_batch(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
        outputs = self.model.generate(
            input_ids,  # **inputs, inputs["input_ids"]
            max_new_tokens=500,  # max_length=max_context_length,
@@ -90,11 +98,12 @@ class Inference:
        # skip all input tokens and only output the additional generated part of the conversation
        input_token_count = len(input_ids[0])
        out_text = self.tokenizer.decode(outputs[0][input_token_count:], skip_special_tokens=True)
-        print(out_text)
+        if print_stdout:
+            print(out_text)
        return outputs, out_text
-    

-    def generate_incremental_2(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+
+    def generate_incremental_2(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
        generated_tokens = input_ids

        past_key_values = DynamicCache()
@@ -126,12 +135,14 @@ class Inference:
                # Decode and print the newly generated token (skip special tokens)
                # out_text = self.tokenizer.decode(next_token, skip_special_tokens=True)
                out_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
-                print(out_text, end="", flush=True)  # Print without newline
+                if print_stdout:
+                    print(out_text, end="", flush=True)  # Print without newline

                # Check if the generated token is the end-of-sequence token
                # if next_token.item() == self.tokenizer.eos_token_id:
                if new_tokens[-1].item() == self.tokenizer.eos_token_id:
-                    print("")
+                    if print_stdout:
+                        print("")
                    break

                # n += 1
@@ -150,12 +161,12 @@ class Inference:
        return generated_tokens, full_output


-    def generate_incremental(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+    def generate_incremental(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
        with torch.inference_mode():
-            return self._generate_incremental(input_ids)
+            return self._generate_incremental(input_ids, print_stdout)


-    def _generate_incremental(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+    def _generate_incremental(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
        # Start with the initial input tokens
        generated_tokens = input_ids  # Initially, this is just the input tokens

@@ -169,7 +180,7 @@ class Inference:
            while True:
                # Call the model with the current tokens
                outputs = self.model(
-                    input_ids=generated_tokens, 
+                    input_ids=generated_tokens,
                    use_cache=True,
                    num_beams = 1
                    # past_key_values=past_key_values
@@ -183,11 +194,13 @@ class Inference:

                # Decode and print the newly generated token (skip special tokens)
                out_text = self.tokenizer.decode(next_token, skip_special_tokens=True)
-                print(out_text, end="", flush=True)  # Print without newline
+                if print_stdout:
+                    print(out_text, end="", flush=True)  # Print without newline

                # Check if the generated token is the end-of-sequence token
                if next_token.item() == self.tokenizer.eos_token_id:
-                    print("")
+                    if print_stdout:
+                        print("")
                    break

                n += 1
--- a/chatbug/inference_profile_experiement.py
+++ b/chatbug/inference_profile_experiement.py
@@ -0,0 +1,76 @@
+import time
+import nvidia_smi
+import torch
+import gc
+from chatbug.inference import Inference
+from chatbug.modelconfig import Modelconfig
+
+
+def empty_cuda():
+    while True:
+        gc.collect()
+        torch.cuda.empty_cache()
+        time.sleep(0.5)
+        vram = nvidia_smi.get_gpu_stats()["memory_used"]
+        print("vram: %d MB" % vram)
+        if vram < 200:
+             return
+
+
+def profile_ex(model_conf: Modelconfig):
+        print("")
+        empty_cuda()
+        messages = [
+            {"role": "system", "content": "Hold a casual conversation with the user. Keep responses short at max 3 sentences. Answer using markdown to the user."},
+            {"role": "user", "content": "How do astronomers determine the original wavelength of light emitted by a celestial body at rest, which is necessary for measuring its speed using the Doppler effect?"},
+        ]
+
+        gpu_stats_before = nvidia_smi.get_gpu_stats()
+        inference = Inference(model_conf)
+
+        gpu_stats_loaded = nvidia_smi.get_gpu_stats()
+        t_start = time.time()
+        input_ids = inference.tokenize(messages, tokenize=True)
+        generated_tokens, full_output = inference.generate_batch(input_ids, print_stdout=False)
+        t_end = time.time()
+        gpu_stats_after = nvidia_smi.get_gpu_stats()
+
+        took = t_end - t_start
+        tokens = len(generated_tokens[0])
+        tokens_per = tokens / took
+        vram_bulk = gpu_stats_loaded["memory_used"] - gpu_stats_before["memory_used"]
+        vram_top = gpu_stats_after["memory_used"] - gpu_stats_loaded["memory_used"]
+        print("model: %s" % model_conf.model_name)
+        print("tokens: %d tk" % tokens)
+        print("time: %.3f s" % took)
+        print("speed: %.3f tk/s" % tokens_per)
+        print("vram_bulk: %d MB" % vram_bulk)
+        print("vram_top: %d MB" % vram_top)
+        print("context: %d tk" % inference.max_context_length)
+        print("")
+
+
+def profile(model_conf):
+    try:
+        profile_ex(model_conf)
+    except Exception as e:
+        print("exception: " + str(e))
+        pass
+
+
+def main():
+    profile(Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True))
+    profile(Modelconfig("unsloth/Llama-3.2-1B"))
+    profile(Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True))
+    profile(Modelconfig("unsloth/llama-3-8b-bnb-4bit"))
+    # profile(Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_8bit=True))
+    profile(Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit"))
+    profile(Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit"))
+    profile(Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True))
+    profile(Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_8bit=True))
+    profile(Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit"))
+
+
+
+if __name__ == "__main__":
+    main()
--- a/chatbug/llama.py
+++ b/chatbug/llama.py
@@ -0,0 +1,46 @@
+
+
+import datetime
+from chatbug.tool_helper import tool_list
+from chatbug.tool_functions import register_dummy
+from chatbug.inference import Inference
+from chatbug.generation_loop import Terminal, msg
+from chatbug import model_selection
+
+
+register_dummy()
+
+
+def initialize_config(inference: Inference) -> Terminal:
+
+    # systemmessage at the very begin of the chat. Will be concatenated with the automatic tool usage descriptions
+    system_prompt = "Hold a casual conversation with the user. Keep responses short at max 5 sentences and on point. Answer using markdown to the user. When providing code examples, avoid comments which provide no additional information. Do not summarize."
+    current_date_and_time = datetime.datetime.now().strftime("Current date is %Y-%m-%d and its %H:%M %p right now.")
+    append_toolcalls = False
+    if append_toolcalls:
+        systemmessage = msg("system", system_prompt + "\n" + current_date_and_time + "\n" + inference.generate_tool_use_header(tool_list))
+    else:
+        systemmessage = msg("system", system_prompt + "\n" + current_date_and_time)
+
+    terminal = Terminal(inference, systemmessage)
+
+    # system message for role flip so the model automatically answers for the user
+    terminal.roleflip = msg("system", "Keep the conversation going, ask for more information on the subject. Keep messages short at max 1-2 sentences. Do not thank and say goodbye.")
+
+    # system messages and user message to bring the model to summarize the entire conversation
+    terminal.summarize = msg("system", "Summarize the conversation as a single, cohesive paragraph. Avoid using any bullet points, numbers, or list formatting. Write in plain text with natural sentences that flow together seamlessly.")
+    terminal.summarize_user = msg("system", "Can you summarize the conversation?")
+
+    # system message to create a conversation title
+    terminal.title_prompt = msg("system", "Please create a very short and descriptive title or label for this conversation. Maximum 2-5 words. Use only plain text, avoid numbering, special characters, or unnecessary formatting-focus on clarity and brevity.")
+    return terminal
+
+
+def main_func():
+    inference = Inference(model_selection.get_model())
+    terminal = initialize_config(inference)
+    terminal.join()
+
+
+if __name__ == "__main__":
+    main_func()
--- a/chatbug/matheval/init.py
+++ b/chatbug/matheval/init.py
@@ -0,0 +1,3 @@
+from chatbug.matheval import ast
+from chatbug.matheval import interpreter
+from chatbug.matheval import lexer
--- a/chatbug/matheval/ast.py
+++ b/chatbug/matheval/ast.py
@@ -1,6 +1,5 @@
-
-import math_lexer as lexer
-from math_lexer import Token
+from chatbug.matheval import lexer
+from chatbug.matheval.lexer import Token


 class Statement:
--- a/chatbug/matheval/interpreter.py
+++ b/chatbug/matheval/interpreter.py
@@ -1,10 +1,11 @@
-import math_ast as ast
+


 from sympy.parsing.sympy_parser import parse_expr
 from sympy.core.numbers import Integer, One, Zero
 from sympy import symbols, Eq, solveset, linsolve, nonlinsolve
 from sympy.core.symbol import Symbol
+from chatbug.matheval import ast


 def interpret(statement: ast.Statement) -> str:
--- a/chatbug/matheval/lexer.py
+++ b/chatbug/matheval/lexer.py
--- a/chatbug/model_selection.py
+++ b/chatbug/model_selection.py
@@ -0,0 +1,95 @@
+
+from chatbug.modelconfig import Modelconfig
+
+
+
+def get_model() -> Modelconfig:
+
+    # model: NousResearch/Hermes-3-Llama-3.2-3B
+    # tokens: 315 tk
+    # time: 94.360 s
+    # speed: 3.338 tk/s
+    # vram_bulk: 3622 MB
+    # vram_top: 80 MB
+    # context: 131072 tk
+    # model = Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True)
+
+    # model: unsloth/Llama-3.2-1B
+    # tokens: 589 tk
+    # time: 39.348 s
+    # speed: 14.969 tk/s
+    # vram_bulk: 4708 MB
+    # vram_top: 102 MB
+    # context: 131072 tk
+    # model = Modelconfig("unsloth/Llama-3.2-1B")  # note, fast, but talks to itself. basically does not work.
+
+    # model: unsloth/Llama-3.2-3B-Instruct
+    # tokens: 285 tk
+    # time: 75.363 s
+    # speed: 3.782 tk/s
+    # vram_bulk: 3512 MB
+    # vram_top: 48 MB
+    # context: 131072 tk
+    # model = Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True)
+
+    # model: unsloth/llama-3-8b-bnb-4bit
+    # tokens: 435 tk
+    # time: 84.314 s
+    # speed: 5.159 tk/s
+    # vram_bulk: 5440 MB
+    # vram_top: 216 MB
+    # context: 8192 tk
+    # model = Modelconfig("unsloth/llama-3-8b-bnb-4bit")
+
+    # Model size: 3.21B params
+    # vram used: xxxxx MB
+    # speed xxxxx t/s
+    # working: DOES NOT LOAD
+    # model = Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_8bit=True)
+
+    # model: unsloth/gemma-2-9b-it-bnb-4bit
+    # tokens: 154 tk
+    # time: 32.727 s
+    # speed: 4.706 tk/s
+    # vram_bulk: 6156 MB
+    # vram_top: 232 MB
+    # context: 8192 tk
+    # model = Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit")
+
+    # model: unsloth/Qwen2.5-7B-Instruct-bnb-4bit
+    # tokens: 120 tk
+    # time: 12.248 s
+    # speed: 9.798 tk/s
+    # vram_bulk: 5382 MB
+    # vram_top: 170 MB
+    # context: 32768 tk
+    model = Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit")  # note, this works really good
+
+    # model: unsloth/Qwen2.5-3B-Instruct
+    # tokens: 112 tk
+    # time: 12.703 s
+    # speed: 8.816 tk/s
+    # vram_bulk: 2108 MB
+    # vram_top: 98 MB
+    # context: 32768 tk
+    # model = Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True)
+
+    # model: unsloth/Qwen2.5-3B-Instruct
+    # tokens: 118 tk
+    # time: 33.748 s
+    # speed: 3.497 tk/s
+    # vram_bulk: 3310 MB
+    # vram_top: 60 MB
+    # context: 32768 tk
+    # model = Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_8bit=True)
+
+    # Model size: 3.87B params
+    # vram used: xxxxx MB
+    # speed xxxxx t/s
+    # error: requires the protobuf library but it was not found in your environment
+    # model = Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit")
+
+    return model
+
+
+
--- a/chatbug/modelconfig.py
+++ b/chatbug/modelconfig.py
@@ -0,0 +1,20 @@
+
+from transformers import BitsAndBytesConfig
+import torch
+
+class Modelconfig:
+    def __init__(self, model_name, bits_and_bytes_config=None, load_in_8bit=False, load_in_4bit=False):
+        self.model_name = model_name
+        if load_in_4bit:
+            assert bits_and_bytes_config == None
+            self.bits_and_bytes_config = BitsAndBytesConfig(  # tool calls don't really work in 4 bit mode
+                load_in_4bit=True,
+                bnb_4bit_quant_type="nf4",  # Recommended for better performance
+                bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
+                bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation
+            )
+        elif load_in_8bit:
+            assert bits_and_bytes_config == None
+            self.bits_and_bytes_config = BitsAndBytesConfig(load_in_8bit=True)
+        else:
+            self.bits_and_bytes_config = bits_and_bytes_config
--- a/chatbug/nvidia_smi.py
+++ b/chatbug/nvidia_smi.py
--- a/chatbug/tool_functions.py
+++ b/chatbug/tool_functions.py
@@ -1,10 +1,8 @@
 import random
 import datetime
-from tool_helper import tool
-import math_lexer
-import math_ast
-import math_interpreter
-import utils
+from chatbug.tool_helper import tool
+import chatbug.matheval as matheval
+import chatbug.utils as utils


 # @tool
@@ -39,10 +37,10 @@ def math_evaluate(expression: str):
 Args:
    expression: A valid arithmetic expression (e.g., '2 + 3 * 4'). The expression must not contain '='."""
    try:
-        tokens = math_lexer.tokenize(expression)
-        parser = math_ast.Parser()
+        tokens = matheval.lexer.tokenize(expression)
+        parser = matheval.ast.Parser()
        ast = parser.parse(tokens)
-        return math_interpreter.interpret(ast)
+        return matheval.interpreter.interpret(ast)
    except Exception as e:
        utils.print_error("Tool call evaluation failed. - " + str(e))
        return "Tool call evaluation failed."
@@ -58,10 +56,10 @@ Args:
        expression = "solve " + " and ".join(equations) + " for " + " and ".join(variables)
        print(expression)

-        tokens = math_lexer.tokenize(expression)
-        parser = math_ast.Parser()
+        tokens = matheval.lexer.tokenize(expression)
+        parser = ast.Parser()
        ast = parser.parse(tokens)
-        return math_interpreter.interpret(ast)
+        return matheval.interpreter.interpret(ast)
    except Exception as e:
            utils.print_error("Tool call evaluation failed. - " + str(e))
            return "Tool call evaluation failed."
--- a/chatbug/tool_helper.py
+++ b/chatbug/tool_helper.py
@@ -2,7 +2,7 @@
 from typing import Callable, List, Optional
 import json
 import re
-import utils
+import chatbug.utils as utils

 tool_list = []

--- a/chatbug/ui/init.py
+++ b/chatbug/ui/init.py
--- a/chatbug/ui/main.py
+++ b/chatbug/ui/main.py
@@ -0,0 +1,20 @@
+
+
+from .server import start_server
+from .serverwait import wait_for_server
+from .ui import start_ui, _start_sandboxed
+
+
+def start_ui():
+    svr = start_server(start_thread=False)
+    url = f"http://localhost:{svr.port}"
+    # wait_for_server(url)
+    # # start_ui(threaded=False)
+    # import webview
+    # w = webview.create_window('asdf', '../../web/index.html', min_size=(1200, 900), zoomable=True)
+    # webview.start(ssl=True)
+
+if __name__ == "__main__":
+    start_ui()
+
+
--- a/chatbug/ui/bottle.py
+++ b/chatbug/ui/bottle.py
--- a/chatbug/ui/bottle_svr.py
+++ b/chatbug/ui/bottle_svr.py
@@ -0,0 +1,50 @@
+#tornado needs this or it does not run
+import asyncio
+try:
+    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+except AttributeError:
+    print("Probably running on linux")
+
+from bottle import route, run, response, static_file, request, post
+from .file_watchdog import FileWatchdog
+
+
+class BottleServer:
+
+    def __init__(self, listen="0.0.0.0", port=8080, start_thread=True, root="web"):
+
+        self.root = root
+
+        self.port = port
+        self.listen = listen
+        self.wdt = FileWatchdog(self.root)
+
+        if start_thread:
+            import threading
+            self.thread = threading.Thread(target=self._run, args=())
+            self.thread.name = "BottleServerThread"
+            self.thread.daemon = True
+            self.thread.start()
+        else:
+            self._run()
+
+    def _home(self):
+        return static_file("index.html", root= self.root)
+
+    def _watchdog(self):
+        return str(self.wdt.time)
+
+    def _files(self, name):
+        if name.endswith(".vue"):
+            return static_file(name, root= self.root, mimetype="text/html")
+        return static_file(name, root= self.root)
+
+    def _run(self):
+
+        route('/')(self._home)
+        route('/watchdog')(self._watchdog)
+        route('/<name:path>')(self._files)
+
+        print(f"Starting server at {self.listen}:{self.port}")
+        run(host=self.listen, port=self.port, debug=False, threaded=True, quiet=True)
+
--- a/chatbug/ui/file_watchdog.py
+++ b/chatbug/ui/file_watchdog.py
@@ -0,0 +1,47 @@
+
+import time
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+
+
+class MyHandler(FileSystemEventHandler):
+    def __init__(self, function):
+        self.function = function
+
+    def on_any_event(self, _event):
+        # Handle the event (e.g., file created, modified, deleted)
+        self.function()
+
+
+class FileWatchdog:
+    def __init__(self, path):
+        self.path = path
+        self.time = 0
+
+        event_handler = MyHandler(lambda: self.event_handler())
+
+        self.observer = Observer()
+        self.observer.schedule(event_handler, path, recursive=True)
+        self.observer.start()
+
+    def event_handler(self):
+        #print("change detected")
+        self.time = time.time()
+
+    def stop(self):
+        self.observer.stop()
+
+
+
+
+
+if __name__ == "__main__":
+    wdt = FileWatchdog("./web")
+
+    try:
+        while True:
+            time.sleep(1)
+            print(wdt.time)
+    except KeyboardInterrupt:
+        wdt.stop()
+
--- a/chatbug/ui/server.py
+++ b/chatbug/ui/server.py
@@ -0,0 +1,10 @@
+from .bottle_svr import BottleServer
+
+
+def start_server(start_thread=False):
+    print("server start")
+    return BottleServer(start_thread=start_thread, root="web")
+
+
+if __name__ == "__main__":
+    start_server()
--- a/chatbug/ui/serverwait.py
+++ b/chatbug/ui/serverwait.py
@@ -0,0 +1,29 @@
+import time
+import requests
+import socket
+
+
+
+def wait_for_server(url, timeout=10, retry_interval=0.5):
+    """
+    Waits for a web server to become available by polling its URL.
+    """
+
+    start_time = time.monotonic()
+    while time.monotonic() - start_time < timeout:
+        try:
+            # First, try a simple TCP connection to check if the port is open
+            hostname, port = url.split("//")[1].split(":")
+            port = int(port)
+            with socket.create_connection((hostname, port), timeout=retry_interval):
+                pass # If the connection succeeds, continue to the HTTP check
+
+            # Then, make an HTTP request to ensure the server is responding correctly
+            response = requests.get(url, timeout=retry_interval)
+            response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+            return  # Server is up and responding correctly
+        except (requests.exceptions.RequestException, socket.error) as e:
+            print(f"Server not yet available: {e}. Retrying in {retry_interval} seconds...")
+            time.sleep(retry_interval)
+
+    raise TimeoutError(f"Server at {url} did not become available within {timeout} seconds.")
--- a/chatbug/ui/ui.py
+++ b/chatbug/ui/ui.py
@@ -0,0 +1,30 @@
+
+
+import webview
+from threading import Thread
+
+
+def start_ui(threaded=False):
+    if threaded:
+        _start_threaded()
+    else:
+        _start_normal()
+
+
+def _start_threaded():
+    t = Thread(target=start_ui, args=[False])
+    t.run()
+
+def _start_normal():
+    webview.create_window('Geargenerator', 'http://localhost:8080', min_size=(1200, 900), zoomable=True)
+    webview.start()
+
+def _start_sandboxed():
+    webview.create_window('Geargenerator', 'web_v2/geargenerator.html', min_size=(1200, 900), zoomable=True)
+    webview.start(ssl=True)
+
+
+
+if __name__ == "__main__":
+    _start_sandboxed()
+    # start_ui(threaded=False)
--- a/chatbug/utils.py
+++ b/chatbug/utils.py
--- a/llama.py
+++ b/llama.py
@@ -1,175 +0,0 @@
-import time
-import random
-from tool_helper import tool_list, parse_and_execute_tool_call
-from tool_functions import register_dummy
-from inference import Inference, torch_reseed
-import datetime
-
-
-
-messages = []
-inference = None
-
-# systemmessage at the very begin of the chat. Will be concatenated with the automatic tool usage descriptions
-systemmessage = "Hold a casual conversation with the user. Keep responses short at max 3 sentences. Answer using markdown to the user."
-
-# system message for role flip so the model automatically answers for the user
-roleflip = {"role": "system", "content": "Keep the conversation going, ask for more information on the subject. Keep messages short at max 1-2 sentences. Do not thank and say goodbye."}
-
-# system messages and user message to bring the model to summarize the entire conversation
-summarize = {"role": "system", "content": "Summarize the conversation as a single, cohesive paragraph. Avoid using any bullet points, numbers, or list formatting. Write in plain text with natural sentences that flow together seamlessly."}
-summarize_user = {"role": "system", "content": "Can you summarize the conversation?"}
-
-# system message to create a conversation title
-title_prompt = {"role": "system", "content": "Please create a very short and descriptive title or label for this conversation. Maximum 2-5 words. Use only plain text, avoid numbering, special characters, or unnecessary formatting-focus on clarity and brevity."}
-
-
-
-register_dummy()
-
-
-
-
-def append_generate_chat(input_text: str, role="user"):
-    t_start = time.time()
-
-    # generate AI response
-    if input_text != None:
-        messages.append({"role": role, "content": input_text})
-
-    inputs = inference.tokenize(messages, tokenize=True)
-
-    outputs, out_text = inference.generate(inputs)
-
-    # append result to message history
-    messages.append({"role": "assistant", "content": out_text})
-
-    print("")
-    print("generation took %.3fs (%d tokens)" % (time.time() - t_start, len(outputs[0])))
-
-    # handle tool call and check if a tool call has happened.
-    tool_result = parse_and_execute_tool_call(out_text, tool_list)
-    if tool_result != None:
-        # tool call happened
-        tool_result = "<tool_response>%s</tool_response>" % tool_result
-        # depending on the chat template the tool response tags must or must not be passed. :(
-        append_generate_chat(tool_result, role="tool")
-
-
-
-
-def main():
-    global messages
-    global inference
-
-    inference = Inference()
-
-    current_date_and_time = datetime.datetime.now().strftime("Current date is %Y-%m-%d and its %H:%M %p right now.")
-    messages = [{"role": "system", "content": systemmessage + "\n" + current_date_and_time + "\n" + inference.generate_tool_use_header(tool_list)}]
-
-    while True:
-        # print an input prompt to receive text or commands
-        input_text = input(">>> ")
-        print("")
-
-
-        if input_text.startswith("!"):
-            append_generate_chat("<tool_response>%s</tool_response>" % input_text[1:], role="tool")
-            # append_generate_chat("%s" % input_text[1:], role="tool")  # depending on the chat template the tool response tags must or must not be passed. :(
-
-        elif input_text.startswith("/clear"):
-            print("clearing chat history")
-            start_msg = messages[0]
-            messages = [start_msg]
-            print("")
-
-        elif input_text.startswith("/history"):
-            history = inference.tokenize(messages, tokenize=False)
-            # history = tokenizer.apply_chat_template(messages, return_tensors="pt", tokenize=False, add_generation_prompt=False)
-            print(history)
-
-        elif input_text.startswith("/undo"):
-            if len(messages) > 2:
-                print("undo latest prompt")
-                messages = messages[:-2]
-            else:
-                print("cannot undo because there are not enough messages on history.")
-            print("")
-
-        elif input_text.startswith("/regen"):
-            if len(messages) >= 2:
-                print("regenerating message (not working)")
-                messages = messages[:-1]
-                seed = random.randint(0, 2**32 - 1)  # Generate a random seed
-                torch_reseed(seed)
-                append_generate_chat(None)
-            else:
-                print("cannot regenerate because there are not enough messages on history.")
-            print("")
-
-        elif input_text.startswith("/more"):
-            append_generate_chat(None)
-
-        elif input_text.startswith("/file"):
-            filename = input_text[len("/file "):]
-            print("read '%s' for prompt:" % filename)
-            with open(filename, "r") as f:
-                content = f.read()
-            print(content)
-            append_generate_chat(content)
-
-        elif input_text.startswith("/auto"):
-            messages_backup = messages
-            messages = [roleflip]
-            for m in messages_backup:
-                role = m["role"]
-                content = m["content"]
-                if role == "user":
-                    role = "assistant"
-                elif role == "assistant":
-                    role = "user"
-                if role != "system":
-                    messages.append({"role": role, "content": content})
-            append_generate_chat(None)  # will automatically advance the conversation as 'user'
-            last_message = messages[-1]
-            last_message["role"] = "user"
-            messages = messages_backup + [last_message]
-            append_generate_chat(None)  # 'regular' chatbot answer
-
-        elif input_text.startswith("/summarize"):
-            messages_temp = list(filter(lambda x: x["role"] != "system", messages))
-            messages_temp = [summarize] + messages_temp + [summarize_user]  # copy dict in last instance
-            # messages_temp[-1]["role"] = "user"
-            input_ids = inference.tokenize(messages_temp, tokenize=True, assistant_prefix="The conversation was about ")
-            generated_tokens, full_output = inference.generate(input_ids)
-
-        elif input_text.startswith("/title"):
-            messages_temp = list(filter(lambda x: x["role"] != "system", messages))
-            messages_temp = [title_prompt] + messages_temp #+ [dict(title)]  # copy dict in last instance
-            messages_temp[-1]["role"] = "user"
-            input_ids = inference.tokenize(messages_temp, tokenize=True, assistant_prefix="Title: ")
-            generated_tokens, full_output = inference.generate(input_ids)
-
-        elif input_text.startswith("/help"):
-            print("!<prompt>   answer as 'tool' in <tool_response> tags")
-            print("/clear      clear chat history")
-            print("/undo       undo latest prompt")
-            print("/regen      regenerate the last message")
-            print("/more       generate more additional information")
-            print("/file       read prompt input from file")
-            print("/auto       automatically advance conversation")
-            print("/summarize  generate a summary of the chat")
-            print("/title      generate a title of the chat")
-            print("/help       print this message")
-            print("")
-
-        elif input_text.startswith("/"):
-            print("unknown command.")
-
-        else:
-            append_generate_chat(input_text)
-
-
-
-if __name__ == "__main__":
-    main()
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 transformers
 accelerate
-bitsandbytes
+bitsandbytes
+pytest
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,28 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='chatbug',
+    version='0.1.0',
+    description='A conversational AI chatbot',
+    author='Florin Tobler',
+    author_email='florin.tobler@hotmail.com',
+    packages=find_packages(exclude=["tests"]),
+    install_requires=[
+        'transformers',
+        'accelerate',
+        'bitsandbytes',
+        'pytest',
+        'pywebview',
+    ],
+    entry_points={
+        'console_scripts': [
+            'chatbug=chatbug.llama:main_func',
+            # a^     b^       c^   d^
+            # a => the command line argument
+            # b => the package name
+            # c => the file name in the package (same as imports)
+            # d => the function to call
+            'chatbugui=chatbug.ui.__main__:start_ui',
+        ],
+    },
+)
--- a/tests/init.py
+++ b/tests/init.py
@@ -1 +0,0 @@
-# empty
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -1,32 +1,20 @@
 import pytest
-import tests.helper as helper
+from tests import helper
+

 inference = None
-InferenceClass = None
 Tensor = None


 def prepare():
-    if InferenceClass == None:
-        test_import_inference_module_librarys()
-    if inference == None:
-        test_instantiate_inference_instance()
-
-
-def test_import_inference_module_librarys():
-    import inference
-    import torch
-    global InferenceClass
-    global Tensor
-    InferenceClass = inference.Inference
-    Tensor = torch.Tensor
-
-
-def test_instantiate_inference_instance():
-    if InferenceClass == None:
-        test_import_inference_module_librarys()
    global inference
-    inference = InferenceClass()
+    global Tensor
+    if inference == None:
+        from torch import Tensor as _Tensor
+        from chatbug.inference import Inference
+        from chatbug.model_selection import get_model
+        inference = Inference(get_model())
+        Tensor = _Tensor


 def test_tool_header_generation():
--- a/tests/test_tool_function_decorator.py
+++ b/tests/test_tool_function_decorator.py
@@ -1,6 +1,6 @@
 import pytest
-import tool_helper
-import tests.helper as helper
+import chatbug.tool_helper as tool_helper
+from tests import helper



--- a/tests/test_tool_functions.py
+++ b/tests/test_tool_functions.py
@@ -1,6 +1,6 @@
 import pytest
-import tool_functions
-
+import chatbug.tool_functions as tool_functions
+from tests import helper


 def test_math_evaluate_1():
@@ -28,6 +28,13 @@ def test_math_evaluate_5():
    result = tool_functions.math_evaluate("sin(pi/2) + cos(0)")
    assert result == "sin(pi/2) + cos(0) = 2"

+def test_math_evaluate_solve_a():
+    result = tool_functions.math_evaluate("solve 240=x*r+x*r^2+x*r^3+s and r=1.618 and s=5 for x, r, s")
+    assert result == "Solved equation system 240 = r**3*x + r**2*x + r*x + s, r = 1.61800000000000 and s = 5 for x=27.7393327937747=~27.739, r=1.61800000000000=~1.618 and s=5.00000000000000=~5.000."
+
+def test_math_evaluate_solve_b():
+    result = tool_functions.math_evaluate("solve 250=x+x*r+s and r=1.618 and s=0 for x, r, s")
+    assert result == "Solved equation system 250 = r*x + s + x, r = 1.61800000000000 and s = 0 for x=95.4927425515661=~95.493, r=1.61800000000000=~1.618 and s=0."



@@ -54,4 +61,3 @@ def test_math_solver_3b():
 def test_math_solver_4():
    result = tool_functions.math_evaluate("solve 2*x**3 + 3*y = 7 and x - y = 1 for x, y")
    assert result == "Solved equation system 2*x**3 + 3*y = 7 and x - y = 1 for x=~1.421 and y=~0.421."
-
--- a/tests/test_tool_parse_exec.py
+++ b/tests/test_tool_parse_exec.py
@@ -1,7 +1,8 @@
 import pytest
-import tool_helper
+from chatbug import tool_helper
 from unittest import mock
-import tests.helper as helper
+from tests import helper
+import re



@@ -40,34 +41,34 @@ def test_match_and_extract_matching3_with_newline():


 def test_string_malformed_faulty():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_str("{json_content}", [])
        assert result == None
        print_error_mock.assert_called_once()  # this will check if the mocked function on the context was called.


 def test_tool_call_json_1():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_json({"name": "tool_dummy", "arguments": {"a": 1, "b": "zwei"}}, [helper.tool_dummy, helper.tool_dummy2])
        assert result == "result_1_zwei"
        assert print_error_mock.call_count == 0


 def test_tool_call_json_2():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_json({"name": "tool_dummy2", "arguments": {"text": "some_text"}}, [helper.tool_dummy, helper.tool_dummy2])
        assert result == "SOME_TEXT"
        assert print_error_mock.call_count == 0


 def test_tool_call_json_non_existing_call_check():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_json({"name": "tool_dummy_which_is_not_existing", "arguments": {"text": "some_text"}}, [helper.tool_dummy, helper.tool_dummy2])
        assert result == None
        assert print_error_mock.call_count == 1  # this will check if the mocked function on the context was called.

 def test_tool_call_json_wrong_arguments_check():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_json({"name": "tool_dummy", "arguments": {"a": "must_be_an_int_but_is_string", "b": "zwei"}}, [helper.tool_dummy, helper.tool_dummy2])
        assert result == None
        assert print_error_mock.call_count == 1  # this will check if the mocked function on the context was called.
@@ -75,7 +76,6 @@ def test_tool_call_json_wrong_arguments_check():


 def test_regex_multiline():
-    import re
    pattern = r"<start>(.*)</end>"

    # The text to search (spanning multiple lines)
--- a/web/index.html
+++ b/web/index.html
@@ -0,0 +1,61 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <!-- <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script> -->
+    <link rel="stylesheet" href="stylesheet.css">
+    <script src="alpine.min.js"></script>
+    <script src="main.js"></script>
+    <script src="watchdog.js"></script>
+</head>
+<body>
+    <div class="sidebar">
+        <h1>Chatbug 🪲</h1>
+        <div class="button">🐛 New Chat</div>
+        <div class="title">Today</div>
+        <div class="button">Building Web UI with Bottle & Alpine.js</div>
+        <div class="button">Coding in python</div>
+        <div class="title">Last Week</div>
+        <div class="title">Older</div>
+    </div>
+    <div class="mainarea">
+        <!-- <h1 x-data="{ message: 'I ❤️ Alpine' }" x-text="message"></h1> -->
+
+        <div class="message">
+            <div class="bubble">Hello world</div>
+        </div>
+        <div class="response">
+            <div class="">Hello! Nice to meet you. What's up?</div>
+        </div>
+        <div class="message">
+            <div class="bubble">ah, just holding an example conversation with you</div>
+        </div>
+        <div class="response">
+            <div class="">Got it! Fun stuff. What kind of projects are you working on these days?</div>
+        </div>
+        <div class="message">
+            <div class="bubble">LLM chatbot named chatbug 🪲</div>
+        </div>
+        <div class="response">
+            <div class="">Cool name! Chatbug sounds like a friendly one. How's it going?</div>
+        </div>
+        <div class="message">
+            <div class="bubble">making a web ui with bottle and alpinejs</div>
+        </div>
+
+        <div class="input">
+            <!-- toolbutton for tool submenu, normally hidden unless pressed -->
+            <div class="button">+</div>
+            <div class="tool list" style="display:none">
+                <div class="tool button">attach file</div>
+                <div class="tool button">regenerate</div>
+                <div class="tool button">undo</div>
+            </div>
+            <input type="text">
+            <!-- send -->
+            <div class="button">↗</div>
+        </div>
+    </div>
+</body>
+</html>
+
+
--- a/web/main.js
+++ b/web/main.js
@@ -0,0 +1,25 @@
+// import {createApp, ref, reactive} from 'vue';
+
+
+
+
+// const app = createApp({
+//     data() {
+
+//         let msg = ref("hello world")
+
+//         try {
+//             msg.value = "" + pywebview.api
+//         } catch (e) {
+//             msg.value = "did not invoke " + e
+//         }
+
+//         window.msg = msg
+//         return {
+//             message: msg
+//         };
+//     }
+// });
+// app.mount('#app');
+
+
--- a/web/stylesheet.css
+++ b/web/stylesheet.css
@@ -0,0 +1,117 @@
+body {
+    background-color: black;
+    color: white;
+    font-family: Arial, Helvetica, sans-serif;
+    margin: 0px;
+    height: 100%;
+}
+
+
+.sidebar {
+    width: 250px;
+    background-color: #2a262a;
+    float: left;
+    height: 100%;
+    position: absolute;
+}
+
+.sidebar h1 {
+    margin: 20px;
+}
+
+
+.sidebar .title {
+    font-size: 8pt;
+    margin: 20px;
+    margin-top: 30px;
+    margin-bottom: 10px;
+}
+.sidebar .button {
+    margin-left: 10px;
+    margin-right: 10px;
+    padding: 10px;
+    border-radius: 10px;
+}
+.sidebar .button:hover {
+    background-color: #423a42;
+}
+
+.mainarea {
+    margin-left: 260px;
+    height: 100%;
+    position: absolute;
+    right: 0;
+    left: 0;
+}
+
+.message  {
+    display: flex;
+    margin-left: 40px;
+    margin-right: 10px;
+
+}
+
+.bubble {
+    padding: 10px;
+    border-radius: 10px;
+    background-color: #416146;
+    margin-left: auto;
+    float: right;
+    position: relative;
+}
+
+.response {
+    display: flex;
+    margin: 30px;
+    position: relative;
+}
+
+.response::before {
+    content: '🪲';
+    position: absolute;
+    top: -4px;
+    left: -30px;
+}
+
+
+.input {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    padding: 10px;
+    background-color: #2a262a;
+    border-radius: 10px;
+    width: 70%;
+    margin: auto;
+    position: absolute;
+    bottom: 40px;
+}
+
+.tool.list {
+    display: none;
+    background-color: #fff;
+    border: 1px solid #ccc;
+    position: absolute;
+    top: 100%;
+    left: 0;
+    z-index: 1;
+    box-shadow: 0 2px 5px rgba(0,0,0,0.2);
+}
+
+.tool.button {
+    cursor: pointer;
+    padding: 5px 10px;
+    margin: 5px;
+}
+
+.input input {
+    flex-grow: 1;
+    padding: 10px;
+    border: 0px solid #ccc;
+    background: none;
+    color: white;
+}
+.input input:focus {
+    outline: 0px solid black; /* Custom focus outline */
+
+}
--- a/web/watchdog.js
+++ b/web/watchdog.js
@@ -0,0 +1,67 @@
+
+
+wdt = {
+    last_wdt_time: 0,
+    watchdog_counter: 0
+}
+
+pollFileChange = () => {
+    setTimeout(() => {
+        wdt.watchdog_counter++
+        console.log(wdt.watchdog_counter)
+        if (wdt.watchdog_counter > 20) {
+            return
+        }
+        ajax({
+            type: "GET",
+            url: "/watchdog",
+            success: (data) => {
+                var time = Number(data)
+                if (wdt.last_wdt_time == 0) {
+                    wdt.last_wdt_time = time
+                    pollFileChange()
+                } else if (time > wdt.last_wdt_time) {
+                    location.reload();
+                } else {
+                    pollFileChange()
+                }
+            },
+        })
+    }, 10000)
+}
+
+function ajax(setting) {
+	if (typeof(shutdown) !== 'undefined') return
+	var request = new XMLHttpRequest();
+	request.open(setting.type, setting.url, true);
+	request.setRequestHeader('Content-Type', setting.dataType)
+	request.onload = function(data) {
+		if (typeof(shutdown) !== 'undefined') return
+		if (this.status >= 200 && this.status < 400) {
+			if (setting.success) {
+				setting.success(this.response)
+			}
+		} else {
+			if (setting.error) {
+				setting.error(this.response)
+			}
+		}
+	}
+	request.onerror = function(data) {
+		if (typeof(shutdown) !== 'undefined') return
+		if (setting.error) {
+			setting.error(data)
+		}
+	}
+	if (setting.data) {
+		request.send(setting.data)
+	} else {
+		request.send()
+	}
+}
+
+
+
+pollFileChange()
+
+
Author	SHA1	Message	Date
Florin Tobler	5e3747179f	UI prototype	2025-01-15 23:39:33 +01:00
Florin Tobler	44e5bd423e	start cases	2025-01-15 23:39:09 +01:00
Florin Tobler	03c93f4d8b	force encoding	2025-01-15 23:38:50 +01:00
Florin Tobler	f9c4d3e2db	add webserver	2025-01-15 23:38:39 +01:00
Florin Tobler	7224111a0b	python package restructuring	2025-01-14 20:29:29 +01:00
Florin Tobler	0c022d4731	tuned prompt	2025-01-13 23:33:51 +01:00
Florin Tobler	a697f49698	whitespace	2025-01-13 23:33:22 +01:00
Florin Tobler	3218e7eb63	whitespace	2025-01-13 23:32:54 +01:00
Florin Tobler	ef789375c8	improved append file	2025-01-13 23:32:42 +01:00
Florin Tobler	7f0cb49156	refactoring	2025-01-13 22:39:32 +01:00
Florin Tobler	19870cdea8	try out some more models	2025-01-13 20:47:48 +01:00
Florin Tobler	677eb6d0ea	switch for toolcalls	2025-01-12 21:16:42 +01:00