UI prototype

start cases
force encoding
2025-01-15 23:39:33 +01:00 · 2025-01-15 23:39:09 +01:00 · 2025-01-15 23:38:50 +01:00 · 2025-01-15 23:38:39 +01:00 · 2025-01-14 20:29:29 +01:00 · 2025-01-13 23:33:51 +01:00
41 changed files with 4847 additions and 267 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 /model/*
 *.prof
 __pycache__
-*.venv
+*.venv
 *.egg-info
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -15,7 +15,7 @@
            "name": "PyDebug: __main__.py",
            "type": "debugpy",
            "request": "launch",
-            "program": "__main__.py",
+            "program": "chatbug/__main__.py",
            "console": "integratedTerminal"
        }
    ]
--- a/chatbug/init.py
+++ b/chatbug/init.py
--- a/chatbug/main.py
+++ b/chatbug/main.py
@@ -1,6 +1,7 @@
 print("running __main__.-py")
-from llama import main
+from chatbug.llama import main_func
 if __name__ == "__main__":
-    main()
+    main_func()
--- a/chatbug/download_model.py
+++ b/chatbug/download_model.py
@@ -0,0 +1,37 @@
 from chatbug.inference import Inference
 from chatbug.modelconfig import Modelconfig
 def main():
    # Model size: 3.21B params
    Inference(Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True))
    # Model size: 1.24B params
    Inference(Modelconfig("unsloth/Llama-3.2-1B", load_in_8bit=True))
    # Model size: 3.21B params
    Inference(Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True))
    # Model size: 4.65B params
    Inference(Modelconfig("unsloth/llama-3-8b-bnb-4bit", load_in_4bit=True))
    # Model size: 3.21B params
    Inference(Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_4bit=True))
    # Model size: 5.21B params
    Inference(Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit", load_in_4bit=True))
    # Model size: 4.46B params
    Inference(Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit", load_in_4bit=True))
    # Model size: 3.09B params
    Inference(Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True))
    # Model size: 3.87B params
    Inference(Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit", load_in_4bit=True))
 if __name__ == "__main__":
    main()
--- a/chatbug/file_append.py
+++ b/chatbug/file_append.py
@@ -0,0 +1,46 @@
 import os
 def check_append_file(prompt: str) -> str:
    if "@" in prompt:
        parts = prompt.split(" ")
        content = []
        for part in parts:
            if part.startswith("@"):
                filename = part[1:]
                try:
                    if os.path.exists(filename):
                        with open(filename, "r", encoding="utf-8") as f:
                            content.append("%s:'''\n%s'''" % (filename, f.read()))
                except FileNotFoundError:
                    print(f"File '{filename}' not found.")
                except Exception as e:
                    print("exception encountered %s", e)
        content.append(prompt)
        return "\n".join(content)
    return prompt
 if __name__ == "__main__":
    exit()  # not accidentally trigger it
    # Create some sample files
    with open("fmain.py", "w") as f:
        f.write("# This is main.py\n")
    with open("finference.py", "w") as f:
        f.write("# This is inference.py\n")
    # Test cases
    test_prompts = [
        "@fmain.py",
        "@fmain.py @finference.py",
        "@fnonexistent.py",
        "@fmain.py @fnonexistent.py"
    ]
    for prompt in test_prompts:
        print(f"Testing prompt: {prompt}")
        result = check_append_file(prompt)
        print(f"Result: {result}")
        print("-" * 20)
--- a/chatbug/generation_loop.py
+++ b/chatbug/generation_loop.py
@@ -0,0 +1,170 @@
 import time
 import json
 import random
 from chatbug.tool_helper import tool_list, parse_and_execute_tool_call
 from chatbug.inference import Inference, torch_reseed
 from chatbug.file_append import check_append_file
 def msg(role: str, content: str) -> dict:
    return {"role": role, "content": content}
 class Terminal:
    def __init__(self, inference: Inference, systemmessage: dict):
        self.inference = inference
        self.messages:list[dict] = [systemmessage]
        # these are meant to be overwritten by better ones
        self.roleflip = msg("system", "keep going.")
        self.summarize = msg("system", "summarize conversation")
        self.summarize_user = msg("system", "please summarize conversation")
        self.title_prompt = msg("system", "create a title for this conversation")
    def append_generate_chat(self, input_text: str, role="user"):
        t_start = time.time()
        # generate AI response
        if input_text != None:
            self.messages.append({"role": role, "content": input_text})
        inputs = self.inference.tokenize(self.messages, tokenize=True)
        number_of_input_tokens = inputs.shape[1]
        outputs, out_text = self.inference.generate(inputs)
        # append result to message history
        self.messages.append({"role": "assistant", "content": out_text})
        print("")
        time_taken = time.time() - t_start
        number_of_tokens = len(outputs[0])
        tokens_per_second = (number_of_tokens - number_of_input_tokens) / time_taken
        print("generation took %.3fs (%d tokens, %.3f t/s)" % (time_taken, number_of_tokens, tokens_per_second))
        # handle tool call and check if a tool call has happened.
        tool_result = parse_and_execute_tool_call(out_text, tool_list)
        if tool_result != None:
            # tool call happened
            tool_result = "<tool_response>%s</tool_response>" % tool_result
            # depending on the chat template the tool response tags must or must not be passed. :(
            self.append_generate_chat(tool_result, role="tool")
    def join(self):
        while True:
            # print an input prompt to receive text or commands
            input_text = input(">>> ")
            print("")
            input_text = check_append_file(input_text)
            if input_text.startswith("!"):
                self.append_generate_chat("<tool_response>%s</tool_response>" % input_text[1:], role="tool")
                # append_generate_chat("%s" % input_text[1:], role="tool")  # depending on the chat template the tool response tags must or must not be passed. :(
            elif input_text.startswith("/clear"):
                print("clearing chat history")
                start_msg = self.messages[0]
                self.message = [start_msg]
                print("")
            elif input_text.startswith("/history"):
                history = self.inference.tokenize(self.messages, tokenize=False)
                # history = tokenizer.apply_chat_template(self.message, return_tensors="pt", tokenize=False, add_generation_prompt=False)
                print(history)
            elif input_text.startswith("/undo"):
                if len(self.messages) > 2:
                    print("undo latest prompt")
                    self.message = self.messages[:-2]
                else:
                    print("cannot undo because there are not enough self.message on history.")
                print("")
            elif input_text.startswith("/regen"):
                if len(self.messages) >= 2:
                    print("regenerating message (not working)")
                    self.messages = self.messages[:-1]
                    seed = random.randint(0, 2**32 - 1)  # Generate a random seed
                    torch_reseed(seed)
                    self.append_generate_chat(None)
                else:
                    print("cannot regenerate because there are not enough self.message on history.")
                print("")
            elif input_text.startswith("/more"):
                self.append_generate_chat(None)
            elif input_text.startswith("/file"):
                filename = input_text[len("/file "):]
                print("read '%s' for prompt:" % filename)
                with open(filename, "r") as f:
                    content = f.read()
                print(content)
                self.append_generate_chat(content)
            elif input_text.startswith("/auto"):
                message_backup = self.messages
                self.messages = [self.roleflip]
                for m in self.message_backup:
                    role = m["role"]
                    content = m["content"]
                    if role == "user":
                        role = "assistant"
                    elif role == "assistant":
                        role = "user"
                    if role != "system":
                        self.message.append({"role": role, "content": content})
                self.append_generate_chat(None)  # will automatically advance the conversation as 'user'
                last_message = self.messages[-1]
                last_message["role"] = "user"
                self.messages = message_backup + [last_message]
                self.append_generate_chat(None)  # 'regular' chatbot answer
            elif input_text.startswith("/summarize"):
                messages_temp = list(filter(lambda x: x["role"] != "system", self.messages))
                messages_temp = [self.summarize] + messages_temp + [self.summarize_user]  # copy dict in last instance
                # messages_temp[-1]["role"] = "user"
                input_ids = self.inference.tokenize(messages_temp, tokenize=True, assistant_prefix="The conversation was about ")
                generated_tokens, full_output = self.inference.generate(input_ids)
            elif input_text.startswith("/title"):
                messages_temp = list(filter(lambda x: x["role"] != "system", self.messages))
                messages_temp = [self.title_prompt] + messages_temp #+ [dict(title)]  # copy dict in last instance
                messages_temp[-1]["role"] = "user"
                input_ids = self.inference.tokenize(messages_temp, tokenize=True, assistant_prefix="Title: ")
                generated_tokens, full_output = self.inference.generate(input_ids)
            elif input_text.startswith("/save"):
                with open("messages.json", "w") as f:
                    json.dump(self.messages, f, indent=4)
            elif input_text.startswith("/load"):
                with open("messages.json", "r") as f:
                    new_messages = json.load(f)
                    self.messages = [self.messages[0]] + new_messages[1:]
            elif input_text.startswith("/help"):
                print("!<prompt>   answer as 'tool' in <tool_response> tags")
                print("/clear      clear chat history")
                print("/undo       undo latest prompt")
                print("/regen      regenerate the last message")
                print("/more       generate more additional information")
                print("/file       read prompt input from file")
                print("/auto       automatically advance conversation")
                print("/summarize  generate a summary of the chat")
                print("/title      generate a title of the chat")
                print("/save       write chat history to file")
                print("/load       load previously saved history")
                print("/help       print this message")
                print("")
            elif input_text.startswith("/"):
                print("unknown command.")
            else:
                self.append_generate_chat(input_text)
--- a/chatbug/gpt2.py
+++ b/chatbug/gpt2.py
--- a/chatbug/inference.py
+++ b/chatbug/inference.py
@@ -14,44 +14,52 @@ from transformers.cache_utils import (
 )
 import torch
 import time
 import utils
 import re
 import os
 import chatbug.utils as utils
 from chatbug.modelconfig import Modelconfig
 torch.set_num_threads(os.cpu_count())  # Adjust this to the number of threads/cores you have
 class Inference:
-    def __init__(self):
+    def __init__(self, modelconfig: Modelconfig):
-        print("loading LLM...")
+        print("loading LLM '%s'..." % modelconfig.model_name)
        t_start = time.time()
        # model_name = "NousResearch/Llama-2-7b-hf"  # will cache on C:\Users\ftobler\.cache\huggingface\hub
-        model_name = "NousResearch/Hermes-3-Llama-3.2-3B"  # will cache on C:\Users\ftobler\.cache\huggingface\hub
+        # model_name = "NousResearch/Hermes-3-Llama-3.2-3B"  # will cache on C:\Users\ftobler\.cache\huggingface\hub
        # model_name = "unsloth/phi-4-unsloth-bnb-4bit" #too big
        # model_name = "gpt2"
        # model_name = "NousResearch/Hermes-2-Pro-Llama-3-8B"
        # model_name = "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2"
        # "meta-llama/Llama-2-7b-hf"  # Replace with your chosen model
-        quantization_config_4bit = BitsAndBytesConfig(  # tool calls don't really work in 4 bit mode
+        # quantization_config_4bit = BitsAndBytesConfig(  # tool calls don't really work in 4 bit mode
-            load_in_4bit=True,
+        #     load_in_4bit=True,
-            bnb_4bit_quant_type="nf4",  # Recommended for better performance
+        #     bnb_4bit_quant_type="nf4",  # Recommended for better performance
-            bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
+        #     bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
-            bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation
+        #     bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation
-        )
+        # )
-        quantization_config_8bit = BitsAndBytesConfig(load_in_8bit=True)
+        # quantization_config_8bit = BitsAndBytesConfig(load_in_8bit=True)
        # Load the model with quantization (optional)
-        self.model = AutoModelForCausalLM.from_pretrained(
+        if modelconfig.bits_and_bytes_config != None:
-            model_name,
+            self.model = AutoModelForCausalLM.from_pretrained(
-            # device_map="auto",  # Automatically places parts of the model on GPU/CPU
+                modelconfig.model_name,
-            # device_map="cuda",  # Automatically places parts of the model on GPU/CPU
+                # device_map="auto",  # Automatically places parts of the model on GPU/CPU
-            device_map="cuda",  # Automatically places parts of the model on GPU/CPU
+                # device_map="cuda",  # Automatically places parts of the model on GPU/CPU
-            # load_in_8bit=True,   # Enables 8-bit quantization if bitsandbytes is installed
+                device_map="cuda",  # Automatically places parts of the model on GPU/CPU
-            quantization_config=quantization_config_8bit
+                # load_in_8bit=True,   # Enables 8-bit quantization if bitsandbytes is installed
-        )
+                quantization_config=modelconfig.bits_and_bytes_config
            )
        else:
            self.model = AutoModelForCausalLM.from_pretrained(
                modelconfig.model_name,
                device_map="cuda",
            )
        # print("apply optimization")
        # self.model.generation_config.cache_implementation = "static"
@@ -59,25 +67,25 @@ class Inference:
        # Load tokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(modelconfig.model_name)
        print("load took %.3fs" % (time.time() - t_start))
-        max_context_length = self.model.config.max_position_embeddings
+        self.max_context_length = self.model.config.max_position_embeddings
        self.tokenizer.chat_template = utils.load_json_file("chat_template.json")
-        print("max_context_length is %d tokens." % (max_context_length))
+        print("max_context_length is %d tokens." % (self.max_context_length))
-    def generate(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+    def generate(self, input_ids: torch.Tensor, print_stdout=True) -> tuple[torch.Tensor, str]:
        with torch.inference_mode():
            with torch.no_grad():
-                return self.generate_incremental_2(input_ids)
+                return self.generate_incremental_2(input_ids, print_stdout)
-    def generate_batch(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+    def generate_batch(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
        outputs = self.model.generate(
            input_ids,  # **inputs, inputs["input_ids"]
            max_new_tokens=500,  # max_length=max_context_length,
@@ -90,11 +98,12 @@ class Inference:
        # skip all input tokens and only output the additional generated part of the conversation
        input_token_count = len(input_ids[0])
        out_text = self.tokenizer.decode(outputs[0][input_token_count:], skip_special_tokens=True)
-        print(out_text)
+        if print_stdout:
            print(out_text)
        return outputs, out_text
-    def generate_incremental_2(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+
    def generate_incremental_2(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
        generated_tokens = input_ids
        past_key_values = DynamicCache()
@@ -126,12 +135,14 @@ class Inference:
                # Decode and print the newly generated token (skip special tokens)
                # out_text = self.tokenizer.decode(next_token, skip_special_tokens=True)
                out_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
-                print(out_text, end="", flush=True)  # Print without newline
+                if print_stdout:
                    print(out_text, end="", flush=True)  # Print without newline
                # Check if the generated token is the end-of-sequence token
                # if next_token.item() == self.tokenizer.eos_token_id:
                if new_tokens[-1].item() == self.tokenizer.eos_token_id:
-                    print("")
+                    if print_stdout:
                        print("")
                    break
                # n += 1
@@ -150,12 +161,12 @@ class Inference:
        return generated_tokens, full_output
-    def generate_incremental(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+    def generate_incremental(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
        with torch.inference_mode():
-            return self._generate_incremental(input_ids)
+            return self._generate_incremental(input_ids, print_stdout)
-    def _generate_incremental(self, input_ids: torch.Tensor) -> tuple[torch.Tensor, str]:
+    def _generate_incremental(self, input_ids: torch.Tensor, print_stdout:bool=True) -> tuple[torch.Tensor, str]:
        # Start with the initial input tokens
        generated_tokens = input_ids  # Initially, this is just the input tokens
@@ -169,7 +180,7 @@ class Inference:
            while True:
                # Call the model with the current tokens
                outputs = self.model(
-                    input_ids=generated_tokens, 
+                    input_ids=generated_tokens,
                    use_cache=True,
                    num_beams = 1
                    # past_key_values=past_key_values
@@ -183,11 +194,13 @@ class Inference:
                # Decode and print the newly generated token (skip special tokens)
                out_text = self.tokenizer.decode(next_token, skip_special_tokens=True)
-                print(out_text, end="", flush=True)  # Print without newline
+                if print_stdout:
                    print(out_text, end="", flush=True)  # Print without newline
                # Check if the generated token is the end-of-sequence token
                if next_token.item() == self.tokenizer.eos_token_id:
-                    print("")
+                    if print_stdout:
                        print("")
                    break
                n += 1
--- a/chatbug/inference_profile_experiement.py
+++ b/chatbug/inference_profile_experiement.py
@@ -0,0 +1,76 @@
 import time
 import nvidia_smi
 import torch
 import gc
 from chatbug.inference import Inference
 from chatbug.modelconfig import Modelconfig
 def empty_cuda():
    while True:
        gc.collect()
        torch.cuda.empty_cache()
        time.sleep(0.5)
        vram = nvidia_smi.get_gpu_stats()["memory_used"]
        print("vram: %d MB" % vram)
        if vram < 200:
             return
 def profile_ex(model_conf: Modelconfig):
        print("")
        empty_cuda()
        messages = [
            {"role": "system", "content": "Hold a casual conversation with the user. Keep responses short at max 3 sentences. Answer using markdown to the user."},
            {"role": "user", "content": "How do astronomers determine the original wavelength of light emitted by a celestial body at rest, which is necessary for measuring its speed using the Doppler effect?"},
        ]
        gpu_stats_before = nvidia_smi.get_gpu_stats()
        inference = Inference(model_conf)
        gpu_stats_loaded = nvidia_smi.get_gpu_stats()
        t_start = time.time()
        input_ids = inference.tokenize(messages, tokenize=True)
        generated_tokens, full_output = inference.generate_batch(input_ids, print_stdout=False)
        t_end = time.time()
        gpu_stats_after = nvidia_smi.get_gpu_stats()
        took = t_end - t_start
        tokens = len(generated_tokens[0])
        tokens_per = tokens / took
        vram_bulk = gpu_stats_loaded["memory_used"] - gpu_stats_before["memory_used"]
        vram_top = gpu_stats_after["memory_used"] - gpu_stats_loaded["memory_used"]
        print("model: %s" % model_conf.model_name)
        print("tokens: %d tk" % tokens)
        print("time: %.3f s" % took)
        print("speed: %.3f tk/s" % tokens_per)
        print("vram_bulk: %d MB" % vram_bulk)
        print("vram_top: %d MB" % vram_top)
        print("context: %d tk" % inference.max_context_length)
        print("")
 def profile(model_conf):
    try:
        profile_ex(model_conf)
    except Exception as e:
        print("exception: " + str(e))
        pass
 def main():
    profile(Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True))
    profile(Modelconfig("unsloth/Llama-3.2-1B"))
    profile(Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True))
    profile(Modelconfig("unsloth/llama-3-8b-bnb-4bit"))
    # profile(Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_8bit=True))
    profile(Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit"))
    profile(Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit"))
    profile(Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True))
    profile(Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_8bit=True))
    profile(Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit"))
 if __name__ == "__main__":
    main()
--- a/chatbug/llama.py
+++ b/chatbug/llama.py
@@ -0,0 +1,46 @@
 import datetime
 from chatbug.tool_helper import tool_list
 from chatbug.tool_functions import register_dummy
 from chatbug.inference import Inference
 from chatbug.generation_loop import Terminal, msg
 from chatbug import model_selection
 register_dummy()
 def initialize_config(inference: Inference) -> Terminal:
    # systemmessage at the very begin of the chat. Will be concatenated with the automatic tool usage descriptions
    system_prompt = "Hold a casual conversation with the user. Keep responses short at max 5 sentences and on point. Answer using markdown to the user. When providing code examples, avoid comments which provide no additional information. Do not summarize."
    current_date_and_time = datetime.datetime.now().strftime("Current date is %Y-%m-%d and its %H:%M %p right now.")
    append_toolcalls = False
    if append_toolcalls:
        systemmessage = msg("system", system_prompt + "\n" + current_date_and_time + "\n" + inference.generate_tool_use_header(tool_list))
    else:
        systemmessage = msg("system", system_prompt + "\n" + current_date_and_time)
    terminal = Terminal(inference, systemmessage)
    # system message for role flip so the model automatically answers for the user
    terminal.roleflip = msg("system", "Keep the conversation going, ask for more information on the subject. Keep messages short at max 1-2 sentences. Do not thank and say goodbye.")
    # system messages and user message to bring the model to summarize the entire conversation
    terminal.summarize = msg("system", "Summarize the conversation as a single, cohesive paragraph. Avoid using any bullet points, numbers, or list formatting. Write in plain text with natural sentences that flow together seamlessly.")
    terminal.summarize_user = msg("system", "Can you summarize the conversation?")
    # system message to create a conversation title
    terminal.title_prompt = msg("system", "Please create a very short and descriptive title or label for this conversation. Maximum 2-5 words. Use only plain text, avoid numbering, special characters, or unnecessary formatting-focus on clarity and brevity.")
    return terminal
 def main_func():
    inference = Inference(model_selection.get_model())
    terminal = initialize_config(inference)
    terminal.join()
 if __name__ == "__main__":
    main_func()
--- a/chatbug/matheval/init.py
+++ b/chatbug/matheval/init.py
@@ -0,0 +1,3 @@
 from chatbug.matheval import ast
 from chatbug.matheval import interpreter
 from chatbug.matheval import lexer
--- a/chatbug/matheval/ast.py
+++ b/chatbug/matheval/ast.py
@@ -1,6 +1,5 @@
-
+from chatbug.matheval import lexer
-import math_lexer as lexer
+from chatbug.matheval.lexer import Token
 from math_lexer import Token
 class Statement:
--- a/chatbug/matheval/interpreter.py
+++ b/chatbug/matheval/interpreter.py
@@ -1,10 +1,11 @@
-import math_ast as ast
+
 from sympy.parsing.sympy_parser import parse_expr
 from sympy.core.numbers import Integer, One, Zero
 from sympy import symbols, Eq, solveset, linsolve, nonlinsolve
 from sympy.core.symbol import Symbol
 from chatbug.matheval import ast
 def interpret(statement: ast.Statement) -> str:
--- a/chatbug/matheval/lexer.py
+++ b/chatbug/matheval/lexer.py
--- a/chatbug/model_selection.py
+++ b/chatbug/model_selection.py
@@ -0,0 +1,95 @@
 from chatbug.modelconfig import Modelconfig
 def get_model() -> Modelconfig:
    # model: NousResearch/Hermes-3-Llama-3.2-3B
    # tokens: 315 tk
    # time: 94.360 s
    # speed: 3.338 tk/s
    # vram_bulk: 3622 MB
    # vram_top: 80 MB
    # context: 131072 tk
    # model = Modelconfig("NousResearch/Hermes-3-Llama-3.2-3B", load_in_8bit=True)
    # model: unsloth/Llama-3.2-1B
    # tokens: 589 tk
    # time: 39.348 s
    # speed: 14.969 tk/s
    # vram_bulk: 4708 MB
    # vram_top: 102 MB
    # context: 131072 tk
    # model = Modelconfig("unsloth/Llama-3.2-1B")  # note, fast, but talks to itself. basically does not work.
    # model: unsloth/Llama-3.2-3B-Instruct
    # tokens: 285 tk
    # time: 75.363 s
    # speed: 3.782 tk/s
    # vram_bulk: 3512 MB
    # vram_top: 48 MB
    # context: 131072 tk
    # model = Modelconfig("unsloth/Llama-3.2-3B-Instruct", load_in_8bit=True)
    # model: unsloth/llama-3-8b-bnb-4bit
    # tokens: 435 tk
    # time: 84.314 s
    # speed: 5.159 tk/s
    # vram_bulk: 5440 MB
    # vram_top: 216 MB
    # context: 8192 tk
    # model = Modelconfig("unsloth/llama-3-8b-bnb-4bit")
    # Model size: 3.21B params
    # vram used: xxxxx MB
    # speed xxxxx t/s
    # working: DOES NOT LOAD
    # model = Modelconfig("unsloth/Llama-3.2-3B-Instruct-GGUF", load_in_8bit=True)
    # model: unsloth/gemma-2-9b-it-bnb-4bit
    # tokens: 154 tk
    # time: 32.727 s
    # speed: 4.706 tk/s
    # vram_bulk: 6156 MB
    # vram_top: 232 MB
    # context: 8192 tk
    # model = Modelconfig("unsloth/gemma-2-9b-it-bnb-4bit")
    # model: unsloth/Qwen2.5-7B-Instruct-bnb-4bit
    # tokens: 120 tk
    # time: 12.248 s
    # speed: 9.798 tk/s
    # vram_bulk: 5382 MB
    # vram_top: 170 MB
    # context: 32768 tk
    model = Modelconfig("unsloth/Qwen2.5-7B-Instruct-bnb-4bit")  # note, this works really good
    # model: unsloth/Qwen2.5-3B-Instruct
    # tokens: 112 tk
    # time: 12.703 s
    # speed: 8.816 tk/s
    # vram_bulk: 2108 MB
    # vram_top: 98 MB
    # context: 32768 tk
    # model = Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_4bit=True)
    # model: unsloth/Qwen2.5-3B-Instruct
    # tokens: 118 tk
    # time: 33.748 s
    # speed: 3.497 tk/s
    # vram_bulk: 3310 MB
    # vram_top: 60 MB
    # context: 32768 tk
    # model = Modelconfig("unsloth/Qwen2.5-3B-Instruct", load_in_8bit=True)
    # Model size: 3.87B params
    # vram used: xxxxx MB
    # speed xxxxx t/s
    # error: requires the protobuf library but it was not found in your environment
    # model = Modelconfig("unsloth/mistral-7b-instruct-v0.3-bnb-4bit")
    return model
--- a/chatbug/modelconfig.py
+++ b/chatbug/modelconfig.py
@@ -0,0 +1,20 @@
 from transformers import BitsAndBytesConfig
 import torch
 class Modelconfig:
    def __init__(self, model_name, bits_and_bytes_config=None, load_in_8bit=False, load_in_4bit=False):
        self.model_name = model_name
        if load_in_4bit:
            assert bits_and_bytes_config == None
            self.bits_and_bytes_config = BitsAndBytesConfig(  # tool calls don't really work in 4 bit mode
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",  # Recommended for better performance
                bnb_4bit_use_double_quant=True, # Optional: Further quantization for more memory saving
                bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 for computation
            )
        elif load_in_8bit:
            assert bits_and_bytes_config == None
            self.bits_and_bytes_config = BitsAndBytesConfig(load_in_8bit=True)
        else:
            self.bits_and_bytes_config = bits_and_bytes_config
--- a/chatbug/nvidia_smi.py
+++ b/chatbug/nvidia_smi.py
--- a/chatbug/tool_functions.py
+++ b/chatbug/tool_functions.py
@@ -1,10 +1,8 @@
 import random
 import datetime
-from tool_helper import tool
+from chatbug.tool_helper import tool
-import math_lexer
+import chatbug.matheval as matheval
-import math_ast
+import chatbug.utils as utils
 import math_interpreter
 import utils
 # @tool
@@ -39,10 +37,10 @@ def math_evaluate(expression: str):
 Args:
    expression: A valid arithmetic expression (e.g., '2 + 3 * 4'). The expression must not contain '='."""
    try:
-        tokens = math_lexer.tokenize(expression)
+        tokens = matheval.lexer.tokenize(expression)
-        parser = math_ast.Parser()
+        parser = matheval.ast.Parser()
        ast = parser.parse(tokens)
-        return math_interpreter.interpret(ast)
+        return matheval.interpreter.interpret(ast)
    except Exception as e:
        utils.print_error("Tool call evaluation failed. - " + str(e))
        return "Tool call evaluation failed."
@@ -58,10 +56,10 @@ Args:
        expression = "solve " + " and ".join(equations) + " for " + " and ".join(variables)
        print(expression)
-        tokens = math_lexer.tokenize(expression)
+        tokens = matheval.lexer.tokenize(expression)
-        parser = math_ast.Parser()
+        parser = ast.Parser()
        ast = parser.parse(tokens)
-        return math_interpreter.interpret(ast)
+        return matheval.interpreter.interpret(ast)
    except Exception as e:
            utils.print_error("Tool call evaluation failed. - " + str(e))
            return "Tool call evaluation failed."
--- a/chatbug/tool_helper.py
+++ b/chatbug/tool_helper.py
@@ -2,7 +2,7 @@
 from typing import Callable, List, Optional
 import json
 import re
-import utils
+import chatbug.utils as utils
 tool_list = []
--- a/chatbug/ui/init.py
+++ b/chatbug/ui/init.py
--- a/chatbug/ui/main.py
+++ b/chatbug/ui/main.py
@@ -0,0 +1,20 @@
 from .server import start_server
 from .serverwait import wait_for_server
 from .ui import start_ui, _start_sandboxed
 def start_ui():
    svr = start_server(start_thread=False)
    url = f"http://localhost:{svr.port}"
    # wait_for_server(url)
    # # start_ui(threaded=False)
    # import webview
    # w = webview.create_window('asdf', '../../web/index.html', min_size=(1200, 900), zoomable=True)
    # webview.start(ssl=True)
 if __name__ == "__main__":
    start_ui()
--- a/chatbug/ui/bottle.py
+++ b/chatbug/ui/bottle.py
--- a/chatbug/ui/bottle_svr.py
+++ b/chatbug/ui/bottle_svr.py
@@ -0,0 +1,50 @@
 #tornado needs this or it does not run
 import asyncio
 try:
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
 except AttributeError:
    print("Probably running on linux")
 from bottle import route, run, response, static_file, request, post
 from .file_watchdog import FileWatchdog
 class BottleServer:
    def __init__(self, listen="0.0.0.0", port=8080, start_thread=True, root="web"):
        self.root = root
        self.port = port
        self.listen = listen
        self.wdt = FileWatchdog(self.root)
        if start_thread:
            import threading
            self.thread = threading.Thread(target=self._run, args=())
            self.thread.name = "BottleServerThread"
            self.thread.daemon = True
            self.thread.start()
        else:
            self._run()
    def _home(self):
        return static_file("index.html", root= self.root)
    def _watchdog(self):
        return str(self.wdt.time)
    def _files(self, name):
        if name.endswith(".vue"):
            return static_file(name, root= self.root, mimetype="text/html")
        return static_file(name, root= self.root)
    def _run(self):
        route('/')(self._home)
        route('/watchdog')(self._watchdog)
        route('/<name:path>')(self._files)
        print(f"Starting server at {self.listen}:{self.port}")
        run(host=self.listen, port=self.port, debug=False, threaded=True, quiet=True)
--- a/chatbug/ui/file_watchdog.py
+++ b/chatbug/ui/file_watchdog.py
@@ -0,0 +1,47 @@
 import time
 from watchdog.observers import Observer
 from watchdog.events import FileSystemEventHandler
 class MyHandler(FileSystemEventHandler):
    def __init__(self, function):
        self.function = function
    def on_any_event(self, _event):
        # Handle the event (e.g., file created, modified, deleted)
        self.function()
 class FileWatchdog:
    def __init__(self, path):
        self.path = path
        self.time = 0
        event_handler = MyHandler(lambda: self.event_handler())
        self.observer = Observer()
        self.observer.schedule(event_handler, path, recursive=True)
        self.observer.start()
    def event_handler(self):
        #print("change detected")
        self.time = time.time()
    def stop(self):
        self.observer.stop()
 if __name__ == "__main__":
    wdt = FileWatchdog("./web")
    try:
        while True:
            time.sleep(1)
            print(wdt.time)
    except KeyboardInterrupt:
        wdt.stop()
--- a/chatbug/ui/server.py
+++ b/chatbug/ui/server.py
@@ -0,0 +1,10 @@
 from .bottle_svr import BottleServer
 def start_server(start_thread=False):
    print("server start")
    return BottleServer(start_thread=start_thread, root="web")
 if __name__ == "__main__":
    start_server()
--- a/chatbug/ui/serverwait.py
+++ b/chatbug/ui/serverwait.py
@@ -0,0 +1,29 @@
 import time
 import requests
 import socket
 def wait_for_server(url, timeout=10, retry_interval=0.5):
    """
    Waits for a web server to become available by polling its URL.
    """
    start_time = time.monotonic()
    while time.monotonic() - start_time < timeout:
        try:
            # First, try a simple TCP connection to check if the port is open
            hostname, port = url.split("//")[1].split(":")
            port = int(port)
            with socket.create_connection((hostname, port), timeout=retry_interval):
                pass # If the connection succeeds, continue to the HTTP check
            # Then, make an HTTP request to ensure the server is responding correctly
            response = requests.get(url, timeout=retry_interval)
            response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
            return  # Server is up and responding correctly
        except (requests.exceptions.RequestException, socket.error) as e:
            print(f"Server not yet available: {e}. Retrying in {retry_interval} seconds...")
            time.sleep(retry_interval)
    raise TimeoutError(f"Server at {url} did not become available within {timeout} seconds.")
--- a/chatbug/ui/ui.py
+++ b/chatbug/ui/ui.py
@@ -0,0 +1,30 @@
 import webview
 from threading import Thread
 def start_ui(threaded=False):
    if threaded:
        _start_threaded()
    else:
        _start_normal()
 def _start_threaded():
    t = Thread(target=start_ui, args=[False])
    t.run()
 def _start_normal():
    webview.create_window('Geargenerator', 'http://localhost:8080', min_size=(1200, 900), zoomable=True)
    webview.start()
 def _start_sandboxed():
    webview.create_window('Geargenerator', 'web_v2/geargenerator.html', min_size=(1200, 900), zoomable=True)
    webview.start(ssl=True)
 if __name__ == "__main__":
    _start_sandboxed()
    # start_ui(threaded=False)
--- a/chatbug/utils.py
+++ b/chatbug/utils.py
--- a/llama.py
+++ b/llama.py
@@ -1,175 +0,0 @@
 import time
 import random
 from tool_helper import tool_list, parse_and_execute_tool_call
 from tool_functions import register_dummy
 from inference import Inference, torch_reseed
 import datetime
 messages = []
 inference = None
 # systemmessage at the very begin of the chat. Will be concatenated with the automatic tool usage descriptions
 systemmessage = "Hold a casual conversation with the user. Keep responses short at max 3 sentences. Answer using markdown to the user."
 # system message for role flip so the model automatically answers for the user
 roleflip = {"role": "system", "content": "Keep the conversation going, ask for more information on the subject. Keep messages short at max 1-2 sentences. Do not thank and say goodbye."}
 # system messages and user message to bring the model to summarize the entire conversation
 summarize = {"role": "system", "content": "Summarize the conversation as a single, cohesive paragraph. Avoid using any bullet points, numbers, or list formatting. Write in plain text with natural sentences that flow together seamlessly."}
 summarize_user = {"role": "system", "content": "Can you summarize the conversation?"}
 # system message to create a conversation title
 title_prompt = {"role": "system", "content": "Please create a very short and descriptive title or label for this conversation. Maximum 2-5 words. Use only plain text, avoid numbering, special characters, or unnecessary formatting-focus on clarity and brevity."}
 register_dummy()
 def append_generate_chat(input_text: str, role="user"):
    t_start = time.time()
    # generate AI response
    if input_text != None:
        messages.append({"role": role, "content": input_text})
    inputs = inference.tokenize(messages, tokenize=True)
    outputs, out_text = inference.generate(inputs)
    # append result to message history
    messages.append({"role": "assistant", "content": out_text})
    print("")
    print("generation took %.3fs (%d tokens)" % (time.time() - t_start, len(outputs[0])))
    # handle tool call and check if a tool call has happened.
    tool_result = parse_and_execute_tool_call(out_text, tool_list)
    if tool_result != None:
        # tool call happened
        tool_result = "<tool_response>%s</tool_response>" % tool_result
        # depending on the chat template the tool response tags must or must not be passed. :(
        append_generate_chat(tool_result, role="tool")
 def main():
    global messages
    global inference
    inference = Inference()
    current_date_and_time = datetime.datetime.now().strftime("Current date is %Y-%m-%d and its %H:%M %p right now.")
    messages = [{"role": "system", "content": systemmessage + "\n" + current_date_and_time + "\n" + inference.generate_tool_use_header(tool_list)}]
    while True:
        # print an input prompt to receive text or commands
        input_text = input(">>> ")
        print("")
        if input_text.startswith("!"):
            append_generate_chat("<tool_response>%s</tool_response>" % input_text[1:], role="tool")
            # append_generate_chat("%s" % input_text[1:], role="tool")  # depending on the chat template the tool response tags must or must not be passed. :(
        elif input_text.startswith("/clear"):
            print("clearing chat history")
            start_msg = messages[0]
            messages = [start_msg]
            print("")
        elif input_text.startswith("/history"):
            history = inference.tokenize(messages, tokenize=False)
            # history = tokenizer.apply_chat_template(messages, return_tensors="pt", tokenize=False, add_generation_prompt=False)
            print(history)
        elif input_text.startswith("/undo"):
            if len(messages) > 2:
                print("undo latest prompt")
                messages = messages[:-2]
            else:
                print("cannot undo because there are not enough messages on history.")
            print("")
        elif input_text.startswith("/regen"):
            if len(messages) >= 2:
                print("regenerating message (not working)")
                messages = messages[:-1]
                seed = random.randint(0, 2**32 - 1)  # Generate a random seed
                torch_reseed(seed)
                append_generate_chat(None)
            else:
                print("cannot regenerate because there are not enough messages on history.")
            print("")
        elif input_text.startswith("/more"):
            append_generate_chat(None)
        elif input_text.startswith("/file"):
            filename = input_text[len("/file "):]
            print("read '%s' for prompt:" % filename)
            with open(filename, "r") as f:
                content = f.read()
            print(content)
            append_generate_chat(content)
        elif input_text.startswith("/auto"):
            messages_backup = messages
            messages = [roleflip]
            for m in messages_backup:
                role = m["role"]
                content = m["content"]
                if role == "user":
                    role = "assistant"
                elif role == "assistant":
                    role = "user"
                if role != "system":
                    messages.append({"role": role, "content": content})
            append_generate_chat(None)  # will automatically advance the conversation as 'user'
            last_message = messages[-1]
            last_message["role"] = "user"
            messages = messages_backup + [last_message]
            append_generate_chat(None)  # 'regular' chatbot answer
        elif input_text.startswith("/summarize"):
            messages_temp = list(filter(lambda x: x["role"] != "system", messages))
            messages_temp = [summarize] + messages_temp + [summarize_user]  # copy dict in last instance
            # messages_temp[-1]["role"] = "user"
            input_ids = inference.tokenize(messages_temp, tokenize=True, assistant_prefix="The conversation was about ")
            generated_tokens, full_output = inference.generate(input_ids)
        elif input_text.startswith("/title"):
            messages_temp = list(filter(lambda x: x["role"] != "system", messages))
            messages_temp = [title_prompt] + messages_temp #+ [dict(title)]  # copy dict in last instance
            messages_temp[-1]["role"] = "user"
            input_ids = inference.tokenize(messages_temp, tokenize=True, assistant_prefix="Title: ")
            generated_tokens, full_output = inference.generate(input_ids)
        elif input_text.startswith("/help"):
            print("!<prompt>   answer as 'tool' in <tool_response> tags")
            print("/clear      clear chat history")
            print("/undo       undo latest prompt")
            print("/regen      regenerate the last message")
            print("/more       generate more additional information")
            print("/file       read prompt input from file")
            print("/auto       automatically advance conversation")
            print("/summarize  generate a summary of the chat")
            print("/title      generate a title of the chat")
            print("/help       print this message")
            print("")
        elif input_text.startswith("/"):
            print("unknown command.")
        else:
            append_generate_chat(input_text)
 if __name__ == "__main__":
    main()
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 transformers
 accelerate
-bitsandbytes
+bitsandbytes
 pytest
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,28 @@
 from setuptools import setup, find_packages
 setup(
    name='chatbug',
    version='0.1.0',
    description='A conversational AI chatbot',
    author='Florin Tobler',
    author_email='florin.tobler@hotmail.com',
    packages=find_packages(exclude=["tests"]),
    install_requires=[
        'transformers',
        'accelerate',
        'bitsandbytes',
        'pytest',
        'pywebview',
    ],
    entry_points={
        'console_scripts': [
            'chatbug=chatbug.llama:main_func',
            # a^     b^       c^   d^
            # a => the command line argument
            # b => the package name
            # c => the file name in the package (same as imports)
            # d => the function to call
            'chatbugui=chatbug.ui.__main__:start_ui',
        ],
    },
 )
--- a/tests/init.py
+++ b/tests/init.py
@@ -1 +0,0 @@
 # empty
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -1,32 +1,20 @@
 import pytest
-import tests.helper as helper
+from tests import helper
 inference = None
 InferenceClass = None
 Tensor = None
 def prepare():
    if InferenceClass == None:
        test_import_inference_module_librarys()
    if inference == None:
        test_instantiate_inference_instance()
 def test_import_inference_module_librarys():
    import inference
    import torch
    global InferenceClass
    global Tensor
    InferenceClass = inference.Inference
    Tensor = torch.Tensor
 def test_instantiate_inference_instance():
    if InferenceClass == None:
        test_import_inference_module_librarys()
    global inference
-    inference = InferenceClass()
+    global Tensor
    if inference == None:
        from torch import Tensor as _Tensor
        from chatbug.inference import Inference
        from chatbug.model_selection import get_model
        inference = Inference(get_model())
        Tensor = _Tensor
 def test_tool_header_generation():
--- a/tests/test_tool_function_decorator.py
+++ b/tests/test_tool_function_decorator.py
@@ -1,6 +1,6 @@
 import pytest
-import tool_helper
+import chatbug.tool_helper as tool_helper
-import tests.helper as helper
+from tests import helper
--- a/tests/test_tool_functions.py
+++ b/tests/test_tool_functions.py
@@ -1,6 +1,6 @@
 import pytest
-import tool_functions
+import chatbug.tool_functions as tool_functions
-
+from tests import helper
 def test_math_evaluate_1():
@@ -28,6 +28,13 @@ def test_math_evaluate_5():
    result = tool_functions.math_evaluate("sin(pi/2) + cos(0)")
    assert result == "sin(pi/2) + cos(0) = 2"
 def test_math_evaluate_solve_a():
    result = tool_functions.math_evaluate("solve 240=x*r+x*r^2+x*r^3+s and r=1.618 and s=5 for x, r, s")
    assert result == "Solved equation system 240 = r**3*x + r**2*x + r*x + s, r = 1.61800000000000 and s = 5 for x=27.7393327937747=~27.739, r=1.61800000000000=~1.618 and s=5.00000000000000=~5.000."
 def test_math_evaluate_solve_b():
    result = tool_functions.math_evaluate("solve 250=x+x*r+s and r=1.618 and s=0 for x, r, s")
    assert result == "Solved equation system 250 = r*x + s + x, r = 1.61800000000000 and s = 0 for x=95.4927425515661=~95.493, r=1.61800000000000=~1.618 and s=0."
@@ -54,4 +61,3 @@ def test_math_solver_3b():
 def test_math_solver_4():
    result = tool_functions.math_evaluate("solve 2*x**3 + 3*y = 7 and x - y = 1 for x, y")
    assert result == "Solved equation system 2*x**3 + 3*y = 7 and x - y = 1 for x=~1.421 and y=~0.421."
--- a/tests/test_tool_parse_exec.py
+++ b/tests/test_tool_parse_exec.py
@@ -1,7 +1,8 @@
 import pytest
-import tool_helper
+from chatbug import tool_helper
 from unittest import mock
-import tests.helper as helper
+from tests import helper
 import re
@@ -40,34 +41,34 @@ def test_match_and_extract_matching3_with_newline():
 def test_string_malformed_faulty():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_str("{json_content}", [])
        assert result == None
        print_error_mock.assert_called_once()  # this will check if the mocked function on the context was called.
 def test_tool_call_json_1():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_json({"name": "tool_dummy", "arguments": {"a": 1, "b": "zwei"}}, [helper.tool_dummy, helper.tool_dummy2])
        assert result == "result_1_zwei"
        assert print_error_mock.call_count == 0
 def test_tool_call_json_2():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_json({"name": "tool_dummy2", "arguments": {"text": "some_text"}}, [helper.tool_dummy, helper.tool_dummy2])
        assert result == "SOME_TEXT"
        assert print_error_mock.call_count == 0
 def test_tool_call_json_non_existing_call_check():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_json({"name": "tool_dummy_which_is_not_existing", "arguments": {"text": "some_text"}}, [helper.tool_dummy, helper.tool_dummy2])
        assert result == None
        assert print_error_mock.call_count == 1  # this will check if the mocked function on the context was called.
 def test_tool_call_json_wrong_arguments_check():
-    with mock.patch("utils.print_error") as print_error_mock:
+    with mock.patch("chatbug.utils.print_error") as print_error_mock:
        result = tool_helper._execute_tool_call_json({"name": "tool_dummy", "arguments": {"a": "must_be_an_int_but_is_string", "b": "zwei"}}, [helper.tool_dummy, helper.tool_dummy2])
        assert result == None
        assert print_error_mock.call_count == 1  # this will check if the mocked function on the context was called.
@@ -75,7 +76,6 @@ def test_tool_call_json_wrong_arguments_check():
 def test_regex_multiline():
    import re
    pattern = r"<start>(.*)</end>"
    # The text to search (spanning multiple lines)
--- a/web/index.html
+++ b/web/index.html
@@ -0,0 +1,61 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <!-- <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script> -->
    <link rel="stylesheet" href="stylesheet.css">
    <script src="alpine.min.js"></script>
    <script src="main.js"></script>
    <script src="watchdog.js"></script>
 </head>
 <body>
    <div class="sidebar">
        <h1>Chatbug 🪲</h1>
        <div class="button">🐛 New Chat</div>
        <div class="title">Today</div>
        <div class="button">Building Web UI with Bottle & Alpine.js</div>
        <div class="button">Coding in python</div>
        <div class="title">Last Week</div>
        <div class="title">Older</div>
    </div>
    <div class="mainarea">
        <!-- <h1 x-data="{ message: 'I ❤️ Alpine' }" x-text="message"></h1> -->
        <div class="message">
            <div class="bubble">Hello world</div>
        </div>
        <div class="response">
            <div class="">Hello! Nice to meet you. What's up?</div>
        </div>
        <div class="message">
            <div class="bubble">ah, just holding an example conversation with you</div>
        </div>
        <div class="response">
            <div class="">Got it! Fun stuff. What kind of projects are you working on these days?</div>
        </div>
        <div class="message">
            <div class="bubble">LLM chatbot named chatbug 🪲</div>
        </div>
        <div class="response">
            <div class="">Cool name! Chatbug sounds like a friendly one. How's it going?</div>
        </div>
        <div class="message">
            <div class="bubble">making a web ui with bottle and alpinejs</div>
        </div>
        <div class="input">
            <!-- toolbutton for tool submenu, normally hidden unless pressed -->
            <div class="button">+</div>
            <div class="tool list" style="display:none">
                <div class="tool button">attach file</div>
                <div class="tool button">regenerate</div>
                <div class="tool button">undo</div>
            </div>
            <input type="text">
            <!-- send -->
            <div class="button">↗</div>
        </div>
    </div>
 </body>
 </html>
--- a/web/main.js
+++ b/web/main.js
@@ -0,0 +1,25 @@
 // import {createApp, ref, reactive} from 'vue';
 // const app = createApp({
 //     data() {
 //         let msg = ref("hello world")
 //         try {
 //             msg.value = "" + pywebview.api
 //         } catch (e) {
 //             msg.value = "did not invoke " + e
 //         }
 //         window.msg = msg
 //         return {
 //             message: msg
 //         };
 //     }
 // });
 // app.mount('#app');
--- a/web/stylesheet.css
+++ b/web/stylesheet.css
@@ -0,0 +1,117 @@
 body {
    background-color: black;
    color: white;
    font-family: Arial, Helvetica, sans-serif;
    margin: 0px;
    height: 100%;
 }
 .sidebar {
    width: 250px;
    background-color: #2a262a;
    float: left;
    height: 100%;
    position: absolute;
 }
 .sidebar h1 {
    margin: 20px;
 }
 .sidebar .title {
    font-size: 8pt;
    margin: 20px;
    margin-top: 30px;
    margin-bottom: 10px;
 }
 .sidebar .button {
    margin-left: 10px;
    margin-right: 10px;
    padding: 10px;
    border-radius: 10px;
 }
 .sidebar .button:hover {
    background-color: #423a42;
 }
 .mainarea {
    margin-left: 260px;
    height: 100%;
    position: absolute;
    right: 0;
    left: 0;
 }
 .message  {
    display: flex;
    margin-left: 40px;
    margin-right: 10px;
 }
 .bubble {
    padding: 10px;
    border-radius: 10px;
    background-color: #416146;
    margin-left: auto;
    float: right;
    position: relative;
 }
 .response {
    display: flex;
    margin: 30px;
    position: relative;
 }
 .response::before {
    content: '🪲';
    position: absolute;
    top: -4px;
    left: -30px;
 }
 .input {
    display: flex;
    justify-content: space-between;
    align-items: center;
    padding: 10px;
    background-color: #2a262a;
    border-radius: 10px;
    width: 70%;
    margin: auto;
    position: absolute;
    bottom: 40px;
 }
 .tool.list {
    display: none;
    background-color: #fff;
    border: 1px solid #ccc;
    position: absolute;
    top: 100%;
    left: 0;
    z-index: 1;
    box-shadow: 0 2px 5px rgba(0,0,0,0.2);
 }
 .tool.button {
    cursor: pointer;
    padding: 5px 10px;
    margin: 5px;
 }
 .input input {
    flex-grow: 1;
    padding: 10px;
    border: 0px solid #ccc;
    background: none;
    color: white;
 }
 .input input:focus {
    outline: 0px solid black; /* Custom focus outline */
 }
--- a/web/watchdog.js
+++ b/web/watchdog.js
@@ -0,0 +1,67 @@
 wdt = {
    last_wdt_time: 0,
    watchdog_counter: 0
 }
 pollFileChange = () => {
    setTimeout(() => {
        wdt.watchdog_counter++
        console.log(wdt.watchdog_counter)
        if (wdt.watchdog_counter > 20) {
            return
        }
        ajax({
            type: "GET",
            url: "/watchdog",
            success: (data) => {
                var time = Number(data)
                if (wdt.last_wdt_time == 0) {
                    wdt.last_wdt_time = time
                    pollFileChange()
                } else if (time > wdt.last_wdt_time) {
                    location.reload();
                } else {
                    pollFileChange()
                }
            },
        })
    }, 10000)
 }
 function ajax(setting) {
 	if (typeof(shutdown) !== 'undefined') return
 	var request = new XMLHttpRequest();
 	request.open(setting.type, setting.url, true);
 	request.setRequestHeader('Content-Type', setting.dataType)
 	request.onload = function(data) {
 		if (typeof(shutdown) !== 'undefined') return
 		if (this.status >= 200 && this.status < 400) {
 			if (setting.success) {
 				setting.success(this.response)
 			}
 		} else {
 			if (setting.error) {
 				setting.error(this.response)
 			}
 		}
 	}
 	request.onerror = function(data) {
 		if (typeof(shutdown) !== 'undefined') return
 		if (setting.error) {
 			setting.error(data)
 		}
 	}
 	if (setting.data) {
 		request.send(setting.data)
 	} else {
 		request.send()
 	}
 }
 pollFileChange()
Author	SHA1	Message	Date
Florin Tobler	5e3747179f	UI prototype	2025-01-15 23:39:33 +01:00
Florin Tobler	44e5bd423e	start cases	2025-01-15 23:39:09 +01:00
Florin Tobler	03c93f4d8b	force encoding	2025-01-15 23:38:50 +01:00
Florin Tobler	f9c4d3e2db	add webserver	2025-01-15 23:38:39 +01:00
Florin Tobler	7224111a0b	python package restructuring	2025-01-14 20:29:29 +01:00
Florin Tobler	0c022d4731	tuned prompt	2025-01-13 23:33:51 +01:00
Florin Tobler	a697f49698	whitespace	2025-01-13 23:33:22 +01:00
Florin Tobler	3218e7eb63	whitespace	2025-01-13 23:32:54 +01:00
Florin Tobler	ef789375c8	improved append file	2025-01-13 23:32:42 +01:00
Florin Tobler	7f0cb49156	refactoring	2025-01-13 22:39:32 +01:00
Florin Tobler	19870cdea8	try out some more models	2025-01-13 20:47:48 +01:00
Florin Tobler	677eb6d0ea	switch for toolcalls	2025-01-12 21:16:42 +01:00