Update tokenizer.py

This commit is contained in:
retoor 2025-01-05 15:00:38 +00:00
parent 03fa14a1c7
commit 533f5e23d1

View File

@ -1,4 +1,3 @@
# I saved this script as gist because I wrote it a lot of times.
# It has support for remembering line numbers and so on what is not used.
# It was originally written in C by me and ported to Python.
@ -59,10 +58,7 @@ def process_file(file_path):
alinia = 1
words = {}
with open(file_path, 'r') as f:
while True:
c = f.read(1)
if not c:
break
while c := f.read(1):
pos += 1
valid = True