diff --git a/default.env b/default.env
new file mode 100644
index 0000000..fbb50dd
--- /dev/null
+++ b/default.env
@@ -0,0 +1,58 @@
+# Discord Token
+THE_TOKEN = ""
+
+# The Channel IDs the bot will operate in seperated by commas
+CHANNEL_IDS = 
+
+# The INIT prompt for all conversations.
+INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes. You can read basic text from URLs if a user sends a user via scraping."
+
+# Loading Emebed Refresh Timing
+REFRESH_INTERVAL=2
+
+# When a message is too large for discord we chunk the response into seperate messages.
+# To ensure we do not rate limit the bot we send these at a delay interval.
+# DEFAULT: 3 a good setting is between 3 and 7 seconds.
+OVERFLOW_DELAY=3
+
+# Max Content to fetch from given URLs
+MAX_CONTENT_LENGTH=2000
+
+# Max tokens for Generations
+MAX_TOKENS = 1499
+
+# ROOT_IP is only used when running the bot without docker compose
+ROOT_IP = 127.0.0.1
+
+# PORT  is only used when running the bot without docker compose
+ROOT_PORT = 8000
+
+# Directory to your models (llama.cpp specfic settings)
+DATA_DIR = /Users/username/code/models
+
+# Enable Expirmental Message Caches (Limited to single session)
+# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
+CACHE = 1
+
+CACHE_TYPE = "disk"
+
+# Set number of threads to use, currently, a standard thread will utilize 1 whole core
+# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
+N_THREADS = 10
+
+# Always use MMAP unless you know what you are doing
+#USE_MMAP=1
+
+# Only use MLOCK if you know what it does!
+USE_MLOCK=0
+
+# The higher the number the more hard core.
+REPEAT_PENALTY=1
+
+# GPU SPECIFIC SETTINGS BELOW
+
+GPU=0
+
+N_GPU_LAYERS=32
+
+PYTHONUNBUFFERED=1
diff --git a/llamabot.js b/llamabot.js
index 3d03203..a9279a0 100644
--- a/llamabot.js
+++ b/llamabot.js
@@ -453,7 +453,7 @@ async function generateResponse(conversation, message) {
         let tokenLength = totalTokens
 
         // Remove older conversations if necessary
-        const maxLength = 2000;
+        const maxLength = 1800;
         if (tokenLength > maxLength) {
             const diff = tokenLength - maxLength;
             let removedTokens = 0;
@@ -461,15 +461,17 @@ async function generateResponse(conversation, message) {
             // Iterate over the messages in reverse order
             for (let i = messagesCopy.length - 1; i >= 0; i--) {
                 const message = messagesCopy[i];
-                const messageTokens = llamaTokenizer.encode([message]);
+                const messageTokens = countLlamaTokens(message);
 
                 // Calculate the token length of the current message
-                const messageTokenLength = messageTokens.length;
+                const messageTokenLength = countLlamaTokens(messageTokens);
 
                 // Remove the current message if it won't make the token length negative
                 if (removedTokens + messageTokenLength <= diff) {
                     messagesCopy.splice(i, 1);
                     removedTokens += messageTokenLength;
+                    console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy)
+                    )
                 } else {
                     // Break the loop if removing the message would make the token length negative
                     break;
@@ -506,6 +508,7 @@ async function generateResponse(conversation, message) {
         if (time > 2) {
             await botMessage.delete()
             clearInterval(refreshInterval);
+            clearTimeout(timeout);
             botMessage = null;
         }