From e008f29522d01d00386d7a9da277955ad1885706 Mon Sep 17 00:00:00 2001
From: randaller <randaller@users.noreply.github.com>
Date: Mon, 20 Mar 2023 17:20:34 +0300
Subject: [PATCH] Create hf-chat-example.py

---
 hf-chat-example.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 hf-chat-example.py

diff --git a/hf-chat-example.py b/hf-chat-example.py
new file mode 100644
index 0000000..d0bae72
--- /dev/null
+++ b/hf-chat-example.py
@@ -0,0 +1,48 @@
+import llamahf
+import os
+
+# # to save memory use bfloat16
+# import torch
+# torch.set_default_dtype(torch.bfloat16)
+
+MODEL = 'decapoda-research/llama-7b-hf'
+# MODEL = 'decapoda-research/llama-13b-hf'
+# MODEL = 'decapoda-research/llama-30b-hf'
+# MODEL = 'decapoda-research/llama-65b-hf'
+
+if os.path.exists('./trained'):
+    MODEL = './trained'
+
+tokenizer = llamahf.LLaMATokenizer.from_pretrained(MODEL)
+model = llamahf.LLaMAForCausalLM.from_pretrained(MODEL, low_cpu_mem_usage=True)
+model.to('cpu')
+
+n = tokenizer.encode('\n', return_tensors='pt')[0]
+
+ctx = """A dialog, where User interacts with AI. AI is helpful, kind, obedient, honest, and knows its own limits.
+User: Hello, AI.
+AI: Hello! How can I assist you today?
+"""
+
+while True:
+    print(ctx)
+    prompt = input(f'User: ')
+    if ctx != "":
+        ctx = ctx + "User: " + prompt + "\n"
+    else:
+        ctx = prompt + "\n"
+
+    ctx = (ctx[-1920:]) if len(ctx) >= 2048 else ctx
+
+    if len(ctx.strip()) > 0:
+        batch = tokenizer(ctx, return_tensors="pt")
+        result = model.generate(batch["input_ids"].cpu(),
+                                do_sample=True,
+                                top_k=50,
+                                max_length=2048,
+                                top_p=0.95,
+                                temperature=1.0,
+                                eos_token_id=n
+                                )
+        decoded = tokenizer.decode(result[0])
+        ctx = decoded + "\n"