Compare commits
No commits in common. "e76bb08dbd7d391ff0d54b6f730fef7b6adf41a6" and "431337cdd9dde05601c59bb9ee8a2916de996bd0" have entirely different histories.
e76bb08dbd
...
431337cdd9
10
Makefile
10
Makefile
@ -1,4 +1,4 @@
|
|||||||
all: build sync export_dataset export_stats merge_images export_statistics export_mentions
|
all: build sync_excempt export_dataset export_stats merge_images export_statistics export_mentions
|
||||||
|
|
||||||
build:
|
build:
|
||||||
pip install build
|
pip install build
|
||||||
@ -8,7 +8,7 @@ build:
|
|||||||
|
|
||||||
|
|
||||||
sync:
|
sync:
|
||||||
./venv/bin/dr.sync
|
dr.sync
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-@rm -r export
|
-@rm -r export
|
||||||
@ -20,12 +20,12 @@ sync_excempt:
|
|||||||
export_stats:
|
export_stats:
|
||||||
@echo "Make sure you have ran 'make sync' first. Results will be in ./export/"
|
@echo "Make sure you have ran 'make sync' first. Results will be in ./export/"
|
||||||
@echo "Exporting statisticts."
|
@echo "Exporting statisticts."
|
||||||
./venv/bin/dr.stats_all
|
dr.stats_all
|
||||||
|
|
||||||
export_dataset:
|
export_dataset:
|
||||||
@echo "Make sure you have ran 'make sync' first."
|
@echo "Make sure you have ran 'make sync' first."
|
||||||
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
|
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
|
||||||
./venv/bin/dr.dataset > export/0_dataset.txt
|
dr.dataset > export/0_dataset.txt
|
||||||
|
|
||||||
export_statistics:
|
export_statistics:
|
||||||
@echo "Exporting statisticts. Result will be ./export/2_statistics.txt"
|
@echo "Exporting statisticts. Result will be ./export/2_statistics.txt"
|
||||||
@ -41,6 +41,6 @@ export_mentions:
|
|||||||
|
|
||||||
merge_images:
|
merge_images:
|
||||||
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
|
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
|
||||||
./venv/bin/python merge_images.py
|
python merge_images.py
|
||||||
|
|
||||||
|
|
||||||
|
BIN
drstats.db
BIN
drstats.db
Binary file not shown.
BIN
src/drstats/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
src/drstats/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/drstats/__pycache__/db.cpython-312.pyc
Normal file
BIN
src/drstats/__pycache__/db.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/drstats/__pycache__/devrant.cpython-312.pyc
Normal file
BIN
src/drstats/__pycache__/devrant.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/drstats/__pycache__/statistics.cpython-312.pyc
Normal file
BIN
src/drstats/__pycache__/statistics.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/drstats/__pycache__/sync.cpython-312.pyc
Normal file
BIN
src/drstats/__pycache__/sync.cpython-312.pyc
Normal file
Binary file not shown.
@ -25,15 +25,9 @@ def timestamp_to_string(timestamp):
|
|||||||
|
|
||||||
|
|
||||||
async def get_recent_rants(start_from=1, page_size=10):
|
async def get_recent_rants(start_from=1, page_size=10):
|
||||||
loop = asyncio.get_running_loop()
|
|
||||||
page = 0
|
page = 0
|
||||||
while True:
|
while True:
|
||||||
|
rants = dr.get_rants("recent", page_size, start_from)["rants"]
|
||||||
def get_rants():
|
|
||||||
return dr.get_rants("recent", page_size, start_from)["rants"]
|
|
||||||
|
|
||||||
rants = asyncio.wait_for(loop.run_in_executor(get_rants), 5)
|
|
||||||
|
|
||||||
page += 1
|
page += 1
|
||||||
for rant in rants:
|
for rant in rants:
|
||||||
if rant is None:
|
if rant is None:
|
||||||
@ -46,29 +40,21 @@ async def get_recent_rants(start_from=1, page_size=10):
|
|||||||
start_from += page_size
|
start_from += page_size
|
||||||
|
|
||||||
|
|
||||||
async def _sync_rants(start_from, page_size,count):
|
|
||||||
async for rant in get_recent_rants(start_from, page_size):
|
|
||||||
start_from += page_size
|
|
||||||
count += 1
|
|
||||||
rant["tags"] = json.dumps(rant["tags"])
|
|
||||||
db["rants"].upsert(rant, ["id"])
|
|
||||||
print(f"Upserted {count} rant(s).")
|
|
||||||
return count
|
|
||||||
|
|
||||||
async def sync_rants():
|
async def sync_rants():
|
||||||
count = 0
|
count = 0
|
||||||
start_from = 0
|
start_from = 0
|
||||||
|
|
||||||
page_size = 20
|
page_size = 20
|
||||||
|
|
||||||
while True:
|
try:
|
||||||
try:
|
async for rant in get_recent_rants(start_from, page_size):
|
||||||
count += await asyncio.wait_for(_sync_rants(start_from, page_size,count),5)
|
|
||||||
start_from += page_size
|
start_from += page_size
|
||||||
except Exception as ex:
|
count += 1
|
||||||
print(ex)
|
rant["tags"] = json.dumps(rant["tags"])
|
||||||
print("If exception described above is an timeout related error, it's due ratelimiting and considered OK.")
|
db["rants"].upsert(rant, ["id"])
|
||||||
break
|
print(f"Upserted {count} rant(s).")
|
||||||
|
except:
|
||||||
|
print("Rate limit of server exceeded. That's normal.s")
|
||||||
|
|
||||||
|
|
||||||
async def sync_comments():
|
async def sync_comments():
|
||||||
@ -76,7 +62,7 @@ async def sync_comments():
|
|||||||
rants_synced = 0
|
rants_synced = 0
|
||||||
for rant in db["rants"].find(order_by="-id"):
|
for rant in db["rants"].find(order_by="-id"):
|
||||||
rants_synced += 1
|
rants_synced += 1
|
||||||
comments = dr.get_rant(rant["id"]).get("comments",[])
|
comments = dr.get_rant(rant["id"])["comments"]
|
||||||
for comment in comments:
|
for comment in comments:
|
||||||
comments_synced += 1
|
comments_synced += 1
|
||||||
comment["created"] = timestamp_to_string(comment["created_time"])
|
comment["created"] = timestamp_to_string(comment["created_time"])
|
||||||
|
Loading…
Reference in New Issue
Block a user