Compare commits

..

No commits in common. "e76bb08dbd7d391ff0d54b6f730fef7b6adf41a6" and "431337cdd9dde05601c59bb9ee8a2916de996bd0" have entirely different histories.

8 changed files with 15 additions and 29 deletions

View File

@ -1,4 +1,4 @@
all: build sync export_dataset export_stats merge_images export_statistics export_mentions
all: build sync_excempt export_dataset export_stats merge_images export_statistics export_mentions
build:
pip install build
@ -8,7 +8,7 @@ build:
sync:
./venv/bin/dr.sync
dr.sync
clean:
-@rm -r export
@ -20,12 +20,12 @@ sync_excempt:
export_stats:
@echo "Make sure you have ran 'make sync' first. Results will be in ./export/"
@echo "Exporting statisticts."
./venv/bin/dr.stats_all
dr.stats_all
export_dataset:
@echo "Make sure you have ran 'make sync' first."
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
./venv/bin/dr.dataset > export/0_dataset.txt
dr.dataset > export/0_dataset.txt
export_statistics:
@echo "Exporting statisticts. Result will be ./export/2_statistics.txt"
@ -41,6 +41,6 @@ export_mentions:
merge_images:
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
./venv/bin/python merge_images.py
python merge_images.py

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -25,15 +25,9 @@ def timestamp_to_string(timestamp):
async def get_recent_rants(start_from=1, page_size=10):
loop = asyncio.get_running_loop()
page = 0
while True:
def get_rants():
return dr.get_rants("recent", page_size, start_from)["rants"]
rants = asyncio.wait_for(loop.run_in_executor(get_rants), 5)
rants = dr.get_rants("recent", page_size, start_from)["rants"]
page += 1
for rant in rants:
if rant is None:
@ -46,29 +40,21 @@ async def get_recent_rants(start_from=1, page_size=10):
start_from += page_size
async def _sync_rants(start_from, page_size,count):
async for rant in get_recent_rants(start_from, page_size):
start_from += page_size
count += 1
rant["tags"] = json.dumps(rant["tags"])
db["rants"].upsert(rant, ["id"])
print(f"Upserted {count} rant(s).")
return count
async def sync_rants():
count = 0
start_from = 0
page_size = 20
while True:
try:
count += await asyncio.wait_for(_sync_rants(start_from, page_size,count),5)
async for rant in get_recent_rants(start_from, page_size):
start_from += page_size
except Exception as ex:
print(ex)
print("If exception described above is an timeout related error, it's due ratelimiting and considered OK.")
break
count += 1
rant["tags"] = json.dumps(rant["tags"])
db["rants"].upsert(rant, ["id"])
print(f"Upserted {count} rant(s).")
except:
print("Rate limit of server exceeded. That's normal.s")
async def sync_comments():
@ -76,7 +62,7 @@ async def sync_comments():
rants_synced = 0
for rant in db["rants"].find(order_by="-id"):
rants_synced += 1
comments = dr.get_rant(rant["id"]).get("comments",[])
comments = dr.get_rant(rant["id"])["comments"]
for comment in comments:
comments_synced += 1
comment["created"] = timestamp_to_string(comment["created_time"])