From 28921b5efa59af594bbff61208043605dd3882c7 Mon Sep 17 00:00:00 2001 From: Stefano Pigozzi Date: Mon, 26 Oct 2020 03:05:14 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20First=20commit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitattributes | 2 + .gitignore | 11 +++ README.md | 3 + bbbdl/__main__.py | 20 +++++ bbbdl/composer.py | 24 +++++ bbbdl/errors.py | 6 ++ bbbdl/resources.py | 69 +++++++++++++++ bbbdl/urlhandler.py | 12 +++ poetry.lock | 209 ++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 24 +++++ 10 files changed, 380 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 README.md create mode 100644 bbbdl/__main__.py create mode 100644 bbbdl/composer.py create mode 100644 bbbdl/errors.py create mode 100644 bbbdl/resources.py create mode 100644 bbbdl/urlhandler.py create mode 100644 poetry.lock create mode 100644 pyproject.toml diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfe0770 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..51ef61a --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Aulavirtuale-dl ignores +VideoLezioni/ + +# Python ignores +**/__pycache__/ +dist/ +*.egg-info/ +**/*.pyc + +# PyCharm ignores +.idea/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..f3ade08 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# bbbdl + +A tool for downloading BigBlueButton meetings diff --git a/bbbdl/__main__.py b/bbbdl/__main__.py new file mode 100644 index 0000000..1579766 --- /dev/null +++ b/bbbdl/__main__.py @@ -0,0 +1,20 @@ +import ffmpeg +import click +from .resources import Meeting +from .composer import compose_lesson + + +@click.command() +@click.option("-i", "--input-url", type=str, prompt=True, + help="The URL of the meeting to download.") +@click.option("-o", "--output-file", type=str, prompt=True, + help="The file the video should be written to.") +def download(input_url, output_file): + meeting = Meeting.from_url(input_url) + streams = compose_lesson(meeting) + output = ffmpeg.output(*streams, output_file) + output.run() + + +if __name__ == "__main__": + download() diff --git a/bbbdl/composer.py b/bbbdl/composer.py new file mode 100644 index 0000000..32250d5 --- /dev/null +++ b/bbbdl/composer.py @@ -0,0 +1,24 @@ +from typing import * +import ffmpeg +from .resources import Meeting + + +def compose_screensharing(meeting: Meeting) -> Tuple[ffmpeg.Stream, ffmpeg.Stream]: + """Keep the deskshare video and the webcam audio, while discarding the rest.""" + + return ( + meeting.deskshare.as_stream().video, + meeting.webcams.as_stream().audio, + ) + + +def compose_lesson(meeting: Meeting) -> Tuple[ffmpeg.Stream, ffmpeg.Stream]: + """Keep slides, deskshare video and webcam audio, while discarding the rest.""" + + video_stream, audio_stream = compose_screensharing(meeting) + + for shape in meeting.shapes: + video_stream = ffmpeg.overlay(video_stream, shape.resource.as_stream().video, + enable=f"between(t, {shape.start}, {shape.end})") + + return video_stream, audio_stream diff --git a/bbbdl/errors.py b/bbbdl/errors.py new file mode 100644 index 0000000..91e8e16 --- /dev/null +++ b/bbbdl/errors.py @@ -0,0 +1,6 @@ +class BBBDLError(Exception): + pass + + +class DownloadError(BBBDLError): + pass diff --git a/bbbdl/resources.py b/bbbdl/resources.py new file mode 100644 index 0000000..b147aa2 --- /dev/null +++ b/bbbdl/resources.py @@ -0,0 +1,69 @@ +from __future__ import annotations +from typing import * +import dataclasses +import requests +import bs4 +import ffmpeg +from .urlhandler import playback_to_data + + +@dataclasses.dataclass() +class Resource: + href: str + + def as_stream(self, **kwargs) -> ffmpeg.Stream: + return ffmpeg.input(self.href, **kwargs) + + +@dataclasses.dataclass() +class Shape: + resource: Resource + start: float + end: float + + @classmethod + def from_tag(cls, tag: bs4.Tag, *, base_url: str) -> Shape: + # No, `"in" not in tag` does not work + if not tag["in"]: + raise ValueError("Tag has no 'in' parameter") + if not tag["out"]: + raise ValueError("Tag has no 'out' parameter") + if not tag["xlink:href"]: + raise ValueError("Tag has no 'xlink:href' parameter") + + return cls( + resource=Resource(href=f"{base_url}/{tag['xlink:href']}"), + start=float(tag["in"]), + end=float(tag["out"]), + ) + + +@dataclasses.dataclass() +class Meeting: + deskshare: Resource + webcams: Resource + shapes: List[Shape] + + @classmethod + def from_base_url(cls, base_url: str, meeting_id: str) -> Meeting: + deskshare = Resource(href=f"{base_url}/presentation/{meeting_id}/deskshare/deskshare.webm") + webcams = Resource(href=f"{base_url}/presentation/{meeting_id}/video/webcams.mp4") + + shape_soup = bs4.BeautifulSoup(requests.get(f"{base_url}/presentation/{meeting_id}/shapes.svg").text, + "lxml") + shapes: List[Shape] = [] + for tag in shape_soup.find_all("image"): + try: + shapes.append(Shape.from_tag(tag, base_url=f"{base_url}/presentation/{meeting_id}")) + except ValueError: + continue + + return cls( + deskshare=deskshare, + webcams=webcams, + shapes=shapes + ) + + @classmethod + def from_url(cls, url: str) -> Meeting: + return cls.from_base_url(*playback_to_data(url)) diff --git a/bbbdl/urlhandler.py b/bbbdl/urlhandler.py new file mode 100644 index 0000000..6328919 --- /dev/null +++ b/bbbdl/urlhandler.py @@ -0,0 +1,12 @@ +from typing import * +import re + + +split_regex = re.compile(r"^(https?://.+)/playback/presentation/2\.0/playback\.html\?meetingId=([0-9a-f-]+)$") + + +def playback_to_data(url) -> Sequence[str]: + match = split_regex.match(url) + if not match: + raise ValueError("Could not split URL in base_url and meeting_id") + return match.groups() diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..c1fe7c2 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,209 @@ +[[package]] +name = "beautifulsoup4" +version = "4.9.3" +description = "Screen-scraping library" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[package.dependencies] +[package.dependencies.soupsieve] +version = ">1.2" +python = ">=3.0" + +[[package]] +name = "certifi" +version = "2020.6.20" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "chardet" +version = "3.0.4" +description = "Universal encoding detector for Python 2 and 3" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "click" +version = "7.1.2" +description = "Composable command line interface toolkit" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "ffmpeg-python" +version = "0.2.0" +description = "Python bindings for FFmpeg - with complex filtering support" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +dev = ["future (0.17.1)", "numpy (1.16.4)", "pytest-mock (1.10.4)", "pytest (4.6.1)", "Sphinx (2.1.0)", "tox (3.12.1)"] + +[package.dependencies] +future = "*" + +[[package]] +name = "future" +version = "0.18.2" +description = "Clean single-source support for Python 3 and 2" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "idna" +version = "2.10" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "lxml" +version = "4.6.1" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html5 = ["html5lib"] +htmlsoup = ["beautifulsoup4"] +source = ["Cython (>=0.29.7)"] + +[[package]] +name = "requests" +version = "2.24.0" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.extras] +security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"] +socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7)", "win-inet-pton"] + +[package.dependencies] +certifi = ">=2017.4.17" +chardet = ">=3.0.2,<4" +idna = ">=2.5,<3" +urllib3 = ">=1.21.1,<1.25.0 || >1.25.0,<1.25.1 || >1.25.1,<1.26" + +[[package]] +name = "soupsieve" +version = "2.0.1" +description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" +optional = false +python-versions = ">=3.5" +marker = "python_version >= \"3.0\"" + +[[package]] +name = "urllib3" +version = "1.25.11" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.extras] +brotli = ["brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"] + +[metadata] +lock-version = "1.0" +python-versions = "^3.8" +content-hash = "8658967e9571ffdc038fe220a009684044442d130b1da057b7a099aa4af53244" + +[metadata.files] +beautifulsoup4 = [ + {file = "beautifulsoup4-4.9.3-py2-none-any.whl", hash = "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35"}, + {file = "beautifulsoup4-4.9.3-py3-none-any.whl", hash = "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"}, + {file = "beautifulsoup4-4.9.3.tar.gz", hash = "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25"}, +] +certifi = [ + {file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"}, + {file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"}, +] +chardet = [ + {file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"}, + {file = "chardet-3.0.4.tar.gz", hash = "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"}, +] +click = [ + {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, + {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, +] +ffmpeg-python = [ + {file = "ffmpeg-python-0.2.0.tar.gz", hash = "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127"}, + {file = "ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5"}, +] +future = [ + {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"}, +] +idna = [ + {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, + {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, +] +lxml = [ + {file = "lxml-4.6.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:4b7572145054330c8e324a72d808c8c8fbe12be33368db28c39a255ad5f7fb51"}, + {file = "lxml-4.6.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:302160eb6e9764168e01d8c9ec6becddeb87776e81d3fcb0d97954dd51d48e0a"}, + {file = "lxml-4.6.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:d4ad7fd3269281cb471ad6c7bafca372e69789540d16e3755dd717e9e5c9d82f"}, + {file = "lxml-4.6.1-cp27-cp27m-win32.whl", hash = "sha256:189ad47203e846a7a4951c17694d845b6ade7917c47c64b29b86526eefc3adf5"}, + {file = "lxml-4.6.1-cp27-cp27m-win_amd64.whl", hash = "sha256:56eff8c6fb7bc4bcca395fdff494c52712b7a57486e4fbde34c31bb9da4c6cc4"}, + {file = "lxml-4.6.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:23c83112b4dada0b75789d73f949dbb4e8f29a0a3511647024a398ebd023347b"}, + {file = "lxml-4.6.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:0e89f5d422988c65e6936e4ec0fe54d6f73f3128c80eb7ecc3b87f595523607b"}, + {file = "lxml-4.6.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:2358809cc64394617f2719147a58ae26dac9e21bae772b45cfb80baa26bfca5d"}, + {file = "lxml-4.6.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:be1ebf9cc25ab5399501c9046a7dcdaa9e911802ed0e12b7d620cd4bbf0518b3"}, + {file = "lxml-4.6.1-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:4fff34721b628cce9eb4538cf9a73d02e0f3da4f35a515773cce6f5fe413b360"}, + {file = "lxml-4.6.1-cp35-cp35m-win32.whl", hash = "sha256:475325e037fdf068e0c2140b818518cf6bc4aa72435c407a798b2db9f8e90810"}, + {file = "lxml-4.6.1-cp35-cp35m-win_amd64.whl", hash = "sha256:f98b6f256be6cec8dd308a8563976ddaff0bdc18b730720f6f4bee927ffe926f"}, + {file = "lxml-4.6.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:be7c65e34d1b50ab7093b90427cbc488260e4b3a38ef2435d65b62e9fa3d798a"}, + {file = "lxml-4.6.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:d18331ea905a41ae71596502bd4c9a2998902328bbabd29e3d0f5f8569fabad1"}, + {file = "lxml-4.6.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:3d9b2b72eb0dbbdb0e276403873ecfae870599c83ba22cadff2db58541e72856"}, + {file = "lxml-4.6.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:d20d32cbb31d731def4b1502294ca2ee99f9249b63bc80e03e67e8f8e126dea8"}, + {file = "lxml-4.6.1-cp36-cp36m-win32.whl", hash = "sha256:d182eada8ea0de61a45a526aa0ae4bcd222f9673424e65315c35820291ff299c"}, + {file = "lxml-4.6.1-cp36-cp36m-win_amd64.whl", hash = "sha256:c0dac835c1a22621ffa5e5f999d57359c790c52bbd1c687fe514ae6924f65ef5"}, + {file = "lxml-4.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d84d741c6e35c9f3e7406cb7c4c2e08474c2a6441d59322a00dcae65aac6315d"}, + {file = "lxml-4.6.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:8862d1c2c020cb7a03b421a9a7b4fe046a208db30994fc8ff68c627a7915987f"}, + {file = "lxml-4.6.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:3a7a380bfecc551cfd67d6e8ad9faa91289173bdf12e9cfafbd2bdec0d7b1ec1"}, + {file = "lxml-4.6.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:2d6571c48328be4304aee031d2d5046cbc8aed5740c654575613c5a4f5a11311"}, + {file = "lxml-4.6.1-cp37-cp37m-win32.whl", hash = "sha256:803a80d72d1f693aa448566be46ffd70882d1ad8fc689a2e22afe63035eb998a"}, + {file = "lxml-4.6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:24e811118aab6abe3ce23ff0d7d38932329c513f9cef849d3ee88b0f848f2aa9"}, + {file = "lxml-4.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2e311a10f3e85250910a615fe194839a04a0f6bc4e8e5bb5cac221344e3a7891"}, + {file = "lxml-4.6.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:a71400b90b3599eb7bf241f947932e18a066907bf84617d80817998cee81e4bf"}, + {file = "lxml-4.6.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:211b3bcf5da70c2d4b84d09232534ad1d78320762e2c59dedc73bf01cb1fc45b"}, + {file = "lxml-4.6.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:e65c221b2115a91035b55a593b6eb94aa1206fa3ab374f47c6dc10d364583ff9"}, + {file = "lxml-4.6.1-cp38-cp38-win32.whl", hash = "sha256:d6f8c23f65a4bfe4300b85f1f40f6c32569822d08901db3b6454ab785d9117cc"}, + {file = "lxml-4.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:573b2f5496c7e9f4985de70b9bbb4719ffd293d5565513e04ac20e42e6e5583f"}, + {file = "lxml-4.6.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:1d87936cb5801c557f3e981c9c193861264c01209cb3ad0964a16310ca1b3301"}, + {file = "lxml-4.6.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:2d5896ddf5389560257bbe89317ca7bcb4e54a02b53a3e572e1ce4226512b51b"}, + {file = "lxml-4.6.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:9b06690224258db5cd39a84e993882a6874676f5de582da57f3df3a82ead9174"}, + {file = "lxml-4.6.1-cp39-cp39-win32.whl", hash = "sha256:bb252f802f91f59767dcc559744e91efa9df532240a502befd874b54571417bd"}, + {file = "lxml-4.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:7ecaef52fd9b9535ae5f01a1dd2651f6608e4ec9dc136fc4dfe7ebe3c3ddb230"}, + {file = "lxml-4.6.1.tar.gz", hash = "sha256:c152b2e93b639d1f36ec5a8ca24cde4a8eefb2b6b83668fcd8e83a67badcb367"}, +] +requests = [ + {file = "requests-2.24.0-py2.py3-none-any.whl", hash = "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898"}, + {file = "requests-2.24.0.tar.gz", hash = "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b"}, +] +soupsieve = [ + {file = "soupsieve-2.0.1-py3-none-any.whl", hash = "sha256:1634eea42ab371d3d346309b93df7870a88610f0725d47528be902a0d95ecc55"}, + {file = "soupsieve-2.0.1.tar.gz", hash = "sha256:a59dc181727e95d25f781f0eb4fd1825ff45590ec8ff49eadfd7f1a537cc0232"}, +] +urllib3 = [ + {file = "urllib3-1.25.11-py2.py3-none-any.whl", hash = "sha256:f5321fbe4bf3fefa0efd0bfe7fb14e90909eb62a48ccda331726b4319897dd5e"}, + {file = "urllib3-1.25.11.tar.gz", hash = "sha256:8d7eaa5a82a1cac232164990f04874c594c9453ec55eef02eab885aa02fc17a2"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ce7ab34 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +[tool.poetry] +name = "bbbdl" +version = "0.1.0" +description = "A downloader for BigBlueButton meetings" +authors = [ + "g.minoccari ", + "Stefano Pigozzi " +] +license = "AGPL-3.0-or-later" + +[tool.poetry.dependencies] +python = "^3.8" +click = "^7.1.2" +beautifulsoup4 = "^4.9.3" +requests = "^2.24.0" +ffmpeg-python = "^0.2.0" +lxml = "^4.6.1" + +[tool.poetry.scripts] +bbbdl = 'bbbdl.__main__:download' + +[build-system] +requires = ["poetry>=0.12"] +build-backend = "poetry.masonry.api"