Compare commits

..

6 Commits

Author SHA1 Message Date
e76bb08dbd Included sync in build process again for good behavior.
Some checks failed
dR export statistics / Compile (push) Failing after 1m8s
2024-11-25 20:44:33 +01:00
60d83b3022 Made get rants async. 2024-11-25 20:39:06 +01:00
c44567e57c Restored accidentally deleted __main__ and __init__. 2024-11-25 20:28:59 +01:00
f309353665 Deleded pycache and added timeout for sync. 2024-11-25 20:27:39 +01:00
d6f15e3c46 Bugfix for handeling receiving comments from rants having no comments. This crashed before this fix. 2024-11-25 20:19:22 +01:00
a933009b66 Updated venv paths 2024-11-25 20:15:31 +01:00
8 changed files with 29 additions and 15 deletions

View File

@ -1,4 +1,4 @@
all: build sync_excempt export_dataset export_stats merge_images export_statistics export_mentions
all: build sync export_dataset export_stats merge_images export_statistics export_mentions
build:
pip install build
@ -8,7 +8,7 @@ build:
sync:
dr.sync
./venv/bin/dr.sync
clean:
-@rm -r export
@ -20,12 +20,12 @@ sync_excempt:
export_stats:
@echo "Make sure you have ran 'make sync' first. Results will be in ./export/"
@echo "Exporting statisticts."
dr.stats_all
./venv/bin/dr.stats_all
export_dataset:
@echo "Make sure you have ran 'make sync' first."
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
dr.dataset > export/0_dataset.txt
./venv/bin/dr.dataset > export/0_dataset.txt
export_statistics:
@echo "Exporting statisticts. Result will be ./export/2_statistics.txt"
@ -41,6 +41,6 @@ export_mentions:
merge_images:
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
python merge_images.py
./venv/bin/python merge_images.py

Binary file not shown.

View File

@ -25,9 +25,15 @@ def timestamp_to_string(timestamp):
async def get_recent_rants(start_from=1, page_size=10):
loop = asyncio.get_running_loop()
page = 0
while True:
rants = dr.get_rants("recent", page_size, start_from)["rants"]
def get_rants():
return dr.get_rants("recent", page_size, start_from)["rants"]
rants = asyncio.wait_for(loop.run_in_executor(get_rants), 5)
page += 1
for rant in rants:
if rant is None:
@ -40,21 +46,29 @@ async def get_recent_rants(start_from=1, page_size=10):
start_from += page_size
async def _sync_rants(start_from, page_size,count):
async for rant in get_recent_rants(start_from, page_size):
start_from += page_size
count += 1
rant["tags"] = json.dumps(rant["tags"])
db["rants"].upsert(rant, ["id"])
print(f"Upserted {count} rant(s).")
return count
async def sync_rants():
count = 0
start_from = 0
page_size = 20
try:
async for rant in get_recent_rants(start_from, page_size):
while True:
try:
count += await asyncio.wait_for(_sync_rants(start_from, page_size,count),5)
start_from += page_size
count += 1
rant["tags"] = json.dumps(rant["tags"])
db["rants"].upsert(rant, ["id"])
print(f"Upserted {count} rant(s).")
except:
print("Rate limit of server exceeded. That's normal.s")
except Exception as ex:
print(ex)
print("If exception described above is an timeout related error, it's due ratelimiting and considered OK.")
break
async def sync_comments():
@ -62,7 +76,7 @@ async def sync_comments():
rants_synced = 0
for rant in db["rants"].find(order_by="-id"):
rants_synced += 1
comments = dr.get_rant(rant["id"])["comments"]
comments = dr.get_rant(rant["id"]).get("comments",[])
for comment in comments:
comments_synced += 1
comment["created"] = timestamp_to_string(comment["created_time"])