Compare commits

..

No commits in common. "e76bb08dbd7d391ff0d54b6f730fef7b6adf41a6" and "431337cdd9dde05601c59bb9ee8a2916de996bd0" have entirely different histories.

8 changed files with 15 additions and 29 deletions

View File

@ -1,4 +1,4 @@
all: build sync export_dataset export_stats merge_images export_statistics export_mentions all: build sync_excempt export_dataset export_stats merge_images export_statistics export_mentions
build: build:
pip install build pip install build
@ -8,7 +8,7 @@ build:
sync: sync:
./venv/bin/dr.sync dr.sync
clean: clean:
-@rm -r export -@rm -r export
@ -20,12 +20,12 @@ sync_excempt:
export_stats: export_stats:
@echo "Make sure you have ran 'make sync' first. Results will be in ./export/" @echo "Make sure you have ran 'make sync' first. Results will be in ./export/"
@echo "Exporting statisticts." @echo "Exporting statisticts."
./venv/bin/dr.stats_all dr.stats_all
export_dataset: export_dataset:
@echo "Make sure you have ran 'make sync' first." @echo "Make sure you have ran 'make sync' first."
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt" @echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
./venv/bin/dr.dataset > export/0_dataset.txt dr.dataset > export/0_dataset.txt
export_statistics: export_statistics:
@echo "Exporting statisticts. Result will be ./export/2_statistics.txt" @echo "Exporting statisticts. Result will be ./export/2_statistics.txt"
@ -41,6 +41,6 @@ export_mentions:
merge_images: merge_images:
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png." @echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
./venv/bin/python merge_images.py python merge_images.py

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -25,15 +25,9 @@ def timestamp_to_string(timestamp):
async def get_recent_rants(start_from=1, page_size=10): async def get_recent_rants(start_from=1, page_size=10):
loop = asyncio.get_running_loop()
page = 0 page = 0
while True: while True:
rants = dr.get_rants("recent", page_size, start_from)["rants"]
def get_rants():
return dr.get_rants("recent", page_size, start_from)["rants"]
rants = asyncio.wait_for(loop.run_in_executor(get_rants), 5)
page += 1 page += 1
for rant in rants: for rant in rants:
if rant is None: if rant is None:
@ -46,29 +40,21 @@ async def get_recent_rants(start_from=1, page_size=10):
start_from += page_size start_from += page_size
async def _sync_rants(start_from, page_size,count):
async for rant in get_recent_rants(start_from, page_size):
start_from += page_size
count += 1
rant["tags"] = json.dumps(rant["tags"])
db["rants"].upsert(rant, ["id"])
print(f"Upserted {count} rant(s).")
return count
async def sync_rants(): async def sync_rants():
count = 0 count = 0
start_from = 0 start_from = 0
page_size = 20 page_size = 20
while True: try:
try: async for rant in get_recent_rants(start_from, page_size):
count += await asyncio.wait_for(_sync_rants(start_from, page_size,count),5)
start_from += page_size start_from += page_size
except Exception as ex: count += 1
print(ex) rant["tags"] = json.dumps(rant["tags"])
print("If exception described above is an timeout related error, it's due ratelimiting and considered OK.") db["rants"].upsert(rant, ["id"])
break print(f"Upserted {count} rant(s).")
except:
print("Rate limit of server exceeded. That's normal.s")
async def sync_comments(): async def sync_comments():
@ -76,7 +62,7 @@ async def sync_comments():
rants_synced = 0 rants_synced = 0
for rant in db["rants"].find(order_by="-id"): for rant in db["rants"].find(order_by="-id"):
rants_synced += 1 rants_synced += 1
comments = dr.get_rant(rant["id"]).get("comments",[]) comments = dr.get_rant(rant["id"])["comments"]
for comment in comments: for comment in comments:
comments_synced += 1 comments_synced += 1
comment["created"] = timestamp_to_string(comment["created_time"]) comment["created"] = timestamp_to_string(comment["created_time"])