Implement bot polling new posts in groups
This commit is contained in:
parent
497ead82d6
commit
1eb3ec8680
67
archive.py
67
archive.py
@ -1,4 +1,5 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
import asyncio
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import dataset
|
import dataset
|
||||||
@ -7,6 +8,8 @@ import toml
|
|||||||
import vk_api
|
import vk_api
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
|
|
||||||
|
import utils
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-c", "--config", help="Config path")
|
parser.add_argument("-c", "--config", help="Config path")
|
||||||
parser.add_argument("-g", "--group", help="VK group to archive")
|
parser.add_argument("-g", "--group", help="VK group to archive")
|
||||||
@ -28,7 +31,6 @@ vk = vk_session.get_api()
|
|||||||
|
|
||||||
tools = vk_api.VkTools(vk_session)
|
tools = vk_api.VkTools(vk_session)
|
||||||
|
|
||||||
print(f"Downloading list of posts in group {args.group}...")
|
|
||||||
parsed_posts = []
|
parsed_posts = []
|
||||||
|
|
||||||
db = dataset.connect('sqlite:///database.db')
|
db = dataset.connect('sqlite:///database.db')
|
||||||
@ -38,61 +40,36 @@ uploaded_posts = db['uploaded_posts']
|
|||||||
group_last_post_count = last_post_count_table.find_one(group=args.group)
|
group_last_post_count = last_post_count_table.find_one(group=args.group)
|
||||||
|
|
||||||
posts_raw = {}
|
posts_raw = {}
|
||||||
if group_last_post_count == None:
|
print(f"Downloading list of posts in group {args.group}...")
|
||||||
|
if group_last_post_count is None:
|
||||||
# download full wall
|
# download full wall
|
||||||
posts_raw = tools.get_all('wall.get', 100, {'domain': args.group})
|
posts_raw = tools.get_all('wall.get', 100, {'domain': args.group})
|
||||||
else:
|
else:
|
||||||
# download only neccessary posts from vk
|
# download only necessary posts from vk
|
||||||
posts_raw["items"] = []
|
|
||||||
last_post_count = group_last_post_count["count"]
|
last_post_count = group_last_post_count["count"]
|
||||||
p_tmp = vk.wall.get(domain=args.group, count=1)
|
posts_raw["items"] = asyncio.run(utils.download_posts_incrementally(vk, args.group, last_post_count))
|
||||||
has_pinned_post = p_tmp["items"][0].get("is_pinned") == 1
|
|
||||||
current_count = p_tmp["count"]
|
|
||||||
if current_count == last_post_count:
|
|
||||||
print("Nothing to do, quitting...")
|
|
||||||
sys.exit(0)
|
|
||||||
posts_raw["count"] = current_count
|
|
||||||
download_count = current_count - last_post_count
|
|
||||||
download_offset = 0
|
|
||||||
if has_pinned_post:
|
|
||||||
# skip pinned post, cuz it appears first in the list
|
|
||||||
download_offset += 1
|
|
||||||
while download_count > 0:
|
|
||||||
to_download = 0
|
|
||||||
if download_count - 100 < 0:
|
|
||||||
to_download = download_count
|
|
||||||
download_count = 0
|
|
||||||
else:
|
|
||||||
to_download = 100
|
|
||||||
download_count -= 100
|
|
||||||
download_offset += 100
|
|
||||||
posts_raw_tmp = vk.wall.get(domain=args.group, offset=download_offset, count=to_download)
|
|
||||||
posts_raw["items"].extend(posts_raw_tmp["items"])
|
|
||||||
last_post_count += len(posts_raw_tmp["items"])
|
|
||||||
posts = posts_raw["items"]
|
posts = posts_raw["items"]
|
||||||
for p in posts:
|
for p in posts:
|
||||||
attachments = p.get("attachments")
|
if uploaded_posts.find_one(group=args.group, post_id=p["id"]) is not None:
|
||||||
if attachments == None:
|
print(f"Post {p['id']} already has been uploaded, skipping it...")
|
||||||
continue
|
continue
|
||||||
parsed_post = {}
|
|
||||||
parsed_post["id"] = p["id"]
|
attachments = p.get("attachments")
|
||||||
parsed_post["text"] = p["text"]
|
parsed_post = {"id": p["id"], "text": p["text"], "date": p["date"], "pinned": p.get("is_pinned") == 1,
|
||||||
parsed_post["date"] = p["date"]
|
"attachments": []}
|
||||||
parsed_post["pinned"] = p.get("is_pinned") == 1
|
if attachments is not None:
|
||||||
parsed_post["attachments"] = []
|
for a in attachments:
|
||||||
for a in attachments:
|
if a["type"] == "photo":
|
||||||
if a["type"] == "photo":
|
# get the biggest resolution of the photo
|
||||||
# get the biggest resolution of the photo
|
a["photo"]["sizes"].sort(key=lambda e: e["height"], reverse=True)
|
||||||
a["photo"]["sizes"].sort(key=lambda e: e["height"], reverse=True)
|
parsed_post["attachments"].append(a["photo"]["sizes"][0]["url"])
|
||||||
parsed_post["attachments"].append(a["photo"]["sizes"][0]["url"])
|
|
||||||
parsed_posts.append(parsed_post)
|
parsed_posts.append(parsed_post)
|
||||||
|
|
||||||
parsed_posts.sort(key=lambda e: e["date"])
|
parsed_posts.sort(key=lambda e: e["date"])
|
||||||
|
|
||||||
print("Uploading posts to the Fediverse...")
|
print("Uploading posts to the Fediverse...")
|
||||||
if group_last_post_count == None:
|
if group_last_post_count is None:
|
||||||
group_last_post_count = {}
|
group_last_post_count = {'count': 0, 'group': args.group}
|
||||||
group_last_post_count['count'] = 0
|
|
||||||
c = 0
|
c = 0
|
||||||
for p in parsed_posts:
|
for p in parsed_posts:
|
||||||
uploaded_media = []
|
uploaded_media = []
|
||||||
@ -108,3 +85,5 @@ for p in parsed_posts:
|
|||||||
last_post_count_table.upsert(group_last_post_count, ['group'])
|
last_post_count_table.upsert(group_last_post_count, ['group'])
|
||||||
c += 1
|
c += 1
|
||||||
print(f"Progress: {c}/{len(parsed_posts)}")
|
print(f"Progress: {c}/{len(parsed_posts)}")
|
||||||
|
|
||||||
|
db.close()
|
||||||
|
109
bot.py
Normal file
109
bot.py
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
import dataset
|
||||||
|
import requests
|
||||||
|
import toml
|
||||||
|
import vk_api
|
||||||
|
from mastodon import Mastodon
|
||||||
|
|
||||||
|
import utils
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-c", "--config", help="Config path")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
config = toml.load(args.config)
|
||||||
|
|
||||||
|
vk_session = vk_api.VkApi(token=config["vk"]["access_token"])
|
||||||
|
vk = vk_session.get_api()
|
||||||
|
|
||||||
|
mastodon_clients = {}
|
||||||
|
bots_longpoll = {}
|
||||||
|
bot_threads = {}
|
||||||
|
q = queue.Queue()
|
||||||
|
|
||||||
|
db = dataset.connect('sqlite:///database.db')
|
||||||
|
uploaded_posts = db['uploaded_posts']
|
||||||
|
|
||||||
|
|
||||||
|
def bot_loop():
|
||||||
|
while True:
|
||||||
|
# get new post from queue
|
||||||
|
post_chunk = q.get(block=True, timeout=None)
|
||||||
|
|
||||||
|
m = mastodon_clients.get(post_chunk["group"])
|
||||||
|
if m is None:
|
||||||
|
print(f"couldn't find corresponding mastodon client for group {post_chunk['group']}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for post in post_chunk["items"]:
|
||||||
|
attachments = post.get("attachments")
|
||||||
|
|
||||||
|
parsed_post = {"id": post["id"], "text": post["text"], "date": post["date"],
|
||||||
|
"pinned": post.get("is_pinned") == 1,
|
||||||
|
"attachments": []}
|
||||||
|
if attachments is not None:
|
||||||
|
for a in attachments:
|
||||||
|
if a["type"] == "photo":
|
||||||
|
# get the biggest resolution of the photo
|
||||||
|
a["photo"]["sizes"].sort(key=lambda e: e["height"], reverse=True)
|
||||||
|
parsed_post["attachments"].append(a["photo"]["sizes"][0]["url"])
|
||||||
|
|
||||||
|
uploaded_media = []
|
||||||
|
for i in parsed_post["attachments"]:
|
||||||
|
resp = requests.get(i)
|
||||||
|
m = m.media_post(resp.content, mime_type='image/jpeg')
|
||||||
|
uploaded_media.append(m)
|
||||||
|
toot = m.status_post(parsed_post["text"], media_ids=uploaded_media, visibility='public')
|
||||||
|
if parsed_post['pinned']:
|
||||||
|
m.status_pin(toot['id'])
|
||||||
|
uploaded_posts.insert({'group': post_chunk["group"], 'post_id': post['id']})
|
||||||
|
|
||||||
|
group_last_post_count = db['last_post_count'].find_one(group=post_chunk["group"])
|
||||||
|
if group_last_post_count is None:
|
||||||
|
group_last_post_count = {'count': 0, 'group': post_chunk["group"]} # FIXME this shouldn't happen
|
||||||
|
group_last_post_count['count'] += 1
|
||||||
|
db['last_post_count'].upsert(group_last_post_count, ['group'])
|
||||||
|
print(f"Uploaded post {post['id']} for group {post_chunk['group']} successfully!")
|
||||||
|
|
||||||
|
|
||||||
|
async def listen_new_posts():
|
||||||
|
tasks = []
|
||||||
|
for group in config["mastodon"]:
|
||||||
|
group_last_post_count = db['last_post_count'].find_one(group=group)
|
||||||
|
if group_last_post_count is None:
|
||||||
|
group_last_post_count = {'count': 0,
|
||||||
|
'group': group} # FIXME need to execute archive.py code for full downloading of wall on new group
|
||||||
|
# FIXME this shouldn't happen
|
||||||
|
tasks.append(utils.download_posts_incrementally(vk, group, group_last_post_count['count']))
|
||||||
|
new_post_chunks = await asyncio.gather(*tasks)
|
||||||
|
for chunk in new_post_chunks:
|
||||||
|
if len(chunk["items"]) > 0:
|
||||||
|
q.put(chunk, block=True, timeout=None)
|
||||||
|
|
||||||
|
for k in config["mastodon"]:
|
||||||
|
mastodon_clients[k] = Mastodon(
|
||||||
|
access_token=config["mastodon"][k]["access_token"],
|
||||||
|
api_base_url=config["mastodon"][k]["instance"]
|
||||||
|
)
|
||||||
|
print(k)
|
||||||
|
|
||||||
|
t = threading.Thread(target=bot_loop)
|
||||||
|
t.start()
|
||||||
|
bot_threads[k] = t
|
||||||
|
|
||||||
|
print("Bot has been set up, listening events...")
|
||||||
|
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
asyncio.run(listen_new_posts())
|
||||||
|
time.sleep(5)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
db.close()
|
||||||
|
break
|
||||||
|
|
30
utils.py
Normal file
30
utils.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from vk_api import VkApi
|
||||||
|
|
||||||
|
|
||||||
|
async def download_posts_incrementally(vk: VkApi, group_domain: str, last_post_count: int):
|
||||||
|
posts_raw = {"items": [], "group": group_domain}
|
||||||
|
p_tmp = vk.wall.get(domain=group_domain, count=1)
|
||||||
|
has_pinned_post = False
|
||||||
|
if len(p_tmp["items"]) > 0:
|
||||||
|
has_pinned_post = p_tmp["items"][0].get("is_pinned") == 1
|
||||||
|
current_count = p_tmp["count"]
|
||||||
|
if current_count == last_post_count:
|
||||||
|
return posts_raw
|
||||||
|
download_count = current_count - last_post_count
|
||||||
|
download_offset = 0
|
||||||
|
if has_pinned_post:
|
||||||
|
# skip pinned post, cuz it appears first in the list
|
||||||
|
download_offset += 1
|
||||||
|
while download_count > 0:
|
||||||
|
to_download = 0
|
||||||
|
if download_count - 100 < 0:
|
||||||
|
to_download = download_count
|
||||||
|
download_count = 0
|
||||||
|
else:
|
||||||
|
to_download = 100
|
||||||
|
download_count -= 100
|
||||||
|
download_offset += 100
|
||||||
|
posts_raw_tmp = vk.wall.get(domain=group_domain, offset=download_offset, count=to_download)
|
||||||
|
posts_raw["items"].extend(posts_raw_tmp["items"])
|
||||||
|
last_post_count += len(posts_raw_tmp["items"])
|
||||||
|
return posts_raw
|
Loading…
Reference in New Issue
Block a user