2022-06-09 22:56:38 +00:00
|
|
|
import argparse
|
2022-07-21 15:55:51 +00:00
|
|
|
import asyncio
|
2022-06-09 22:56:38 +00:00
|
|
|
import sys
|
2022-07-04 20:11:44 +00:00
|
|
|
|
2022-06-09 22:56:38 +00:00
|
|
|
import dataset
|
2022-07-04 20:11:44 +00:00
|
|
|
import requests
|
|
|
|
import toml
|
|
|
|
import vk_api
|
|
|
|
from mastodon import Mastodon
|
2022-06-09 22:56:38 +00:00
|
|
|
|
2022-07-21 15:55:51 +00:00
|
|
|
import utils
|
|
|
|
|
2022-06-09 22:56:38 +00:00
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("-c", "--config", help="Config path")
|
2022-07-04 20:11:44 +00:00
|
|
|
parser.add_argument("-g", "--group", help="VK group to archive")
|
2022-06-09 22:56:38 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
config = toml.load(args.config)
|
|
|
|
|
2022-07-04 20:11:44 +00:00
|
|
|
if config["mastodon"].get(args.group) is None:
|
|
|
|
print("invalid group")
|
|
|
|
sys.exit(1)
|
|
|
|
|
2022-06-09 22:56:38 +00:00
|
|
|
mastodon = Mastodon(
|
2022-07-04 20:11:44 +00:00
|
|
|
access_token=config["mastodon"][args.group]["access_token"],
|
|
|
|
api_base_url=config["mastodon"][args.group]["instance"]
|
2022-06-09 22:56:38 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
vk_session = vk_api.VkApi(token=config["vk"]["access_token"])
|
|
|
|
vk = vk_session.get_api()
|
|
|
|
|
|
|
|
tools = vk_api.VkTools(vk_session)
|
|
|
|
|
|
|
|
parsed_posts = []
|
|
|
|
|
|
|
|
db = dataset.connect('sqlite:///database.db')
|
|
|
|
last_post_count_table = db['last_post_count']
|
|
|
|
uploaded_posts = db['uploaded_posts']
|
|
|
|
|
|
|
|
group_last_post_count = last_post_count_table.find_one(group=args.group)
|
|
|
|
|
|
|
|
posts_raw = {}
|
2022-07-21 15:55:51 +00:00
|
|
|
print(f"Downloading list of posts in group {args.group}...")
|
|
|
|
if group_last_post_count is None:
|
2022-06-09 23:03:19 +00:00
|
|
|
# download full wall
|
2022-06-09 22:56:38 +00:00
|
|
|
posts_raw = tools.get_all('wall.get', 100, {'domain': args.group})
|
|
|
|
else:
|
2022-07-21 15:55:51 +00:00
|
|
|
# download only necessary posts from vk
|
2022-06-09 22:56:38 +00:00
|
|
|
last_post_count = group_last_post_count["count"]
|
2022-07-21 15:55:51 +00:00
|
|
|
posts_raw["items"] = asyncio.run(utils.download_posts_incrementally(vk, args.group, last_post_count))
|
2022-06-09 22:56:38 +00:00
|
|
|
posts = posts_raw["items"]
|
|
|
|
for p in posts:
|
2022-07-21 15:55:51 +00:00
|
|
|
if uploaded_posts.find_one(group=args.group, post_id=p["id"]) is not None:
|
|
|
|
print(f"Post {p['id']} already has been uploaded, skipping it...")
|
2022-06-09 22:56:38 +00:00
|
|
|
continue
|
2022-07-21 15:55:51 +00:00
|
|
|
|
|
|
|
attachments = p.get("attachments")
|
|
|
|
parsed_post = {"id": p["id"], "text": p["text"], "date": p["date"], "pinned": p.get("is_pinned") == 1,
|
|
|
|
"attachments": []}
|
|
|
|
if attachments is not None:
|
|
|
|
for a in attachments:
|
|
|
|
if a["type"] == "photo":
|
|
|
|
# get the biggest resolution of the photo
|
|
|
|
a["photo"]["sizes"].sort(key=lambda e: e["height"], reverse=True)
|
|
|
|
parsed_post["attachments"].append(a["photo"]["sizes"][0]["url"])
|
2022-06-09 22:56:38 +00:00
|
|
|
parsed_posts.append(parsed_post)
|
|
|
|
|
|
|
|
parsed_posts.sort(key=lambda e: e["date"])
|
|
|
|
|
|
|
|
print("Uploading posts to the Fediverse...")
|
2022-07-21 15:55:51 +00:00
|
|
|
if group_last_post_count is None:
|
|
|
|
group_last_post_count = {'count': 0, 'group': args.group}
|
2022-06-09 22:56:38 +00:00
|
|
|
c = 0
|
|
|
|
for p in parsed_posts:
|
|
|
|
uploaded_media = []
|
|
|
|
for i in p["attachments"]:
|
|
|
|
resp = requests.get(i)
|
|
|
|
m = mastodon.media_post(resp.content, mime_type='image/jpeg')
|
|
|
|
uploaded_media.append(m)
|
|
|
|
toot = mastodon.status_post(p["text"], media_ids=uploaded_media, visibility='public')
|
|
|
|
if p['pinned']:
|
|
|
|
mastodon.status_pin(toot['id'])
|
|
|
|
uploaded_posts.insert({'group': args.group, 'post_id': p['id']})
|
|
|
|
group_last_post_count['count'] += 1
|
|
|
|
last_post_count_table.upsert(group_last_post_count, ['group'])
|
|
|
|
c += 1
|
2022-06-11 21:04:55 +00:00
|
|
|
print(f"Progress: {c}/{len(parsed_posts)}")
|
2022-07-21 15:55:51 +00:00
|
|
|
|
|
|
|
db.close()
|