{ "info": { "author": "Johannes Fischer", "author_email": "aulasparticularesdealemaosp@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.10", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Utilities" ], "description": "\r\n### One line web scraping by combining pandas and BeautifulSoup4\r\n\r\n##### Check out the video\r\n\r\n
\r\n \r\n \r\n \r\n
\r\n\r\n##### Code from the video\r\n\r\n```python\r\npip install a-pandas-ex-bs4df \r\n```\r\n\r\n```python\r\nfrom a_pandas_ex_bs4df import pd_add_bs4_to_df\r\nimport pandas as pd\r\npd_add_bs4_to_df() \r\n\r\nfrom PrettyColorPrinter import add_printer #optional\r\nadd_printer(True) #optional\r\n\r\ndf=pd.Q_bs4_to_df(r'https://github.com/search?l=Python&q=python&type=Repositories')\r\ndf.loc[(~df.bb_href.isna()) & df.aa_attrs_values.str.contains('middle',regex=False, na=False)]\r\ndf.loc[(~df.bb_href.isna()) & df.aa_attrs_values.str.contains('middle',regex=False, na=False)].ff_fetchParents.apply(lambda x: x())\r\ndf.loc[(~df.bb_src.isna()) & (~df.bb_src.str.contains(r'\\.png$',regex=True,na=False))]\r\ndf.loc[(~df.bb_src.isna()) & (df.bb_src.str.contains(r'\\.png$',regex=True,na=False))]\r\n```\r\n\r\n```python\r\nParameters:\r\n htmlcode:Union[str,bytes]\r\n file path, url or html source code\r\n urls will be downloaded with requests\r\n dontuse:tuple\r\n bs4 attributes to exclude from the dataframe\r\n default = (\r\n \"element_classes\",\r\n \"builder\",\r\n \"is_xml\",\r\n \"known_xml\",\r\n \"_namespaces\",\r\n \"parse_only\",\r\n \"markup\",\r\n \"contains_replacement_characters\",\r\n \"original_encoding\",\r\n \"declared_html_encoding\",\r\n \"parser_class\",\r\n \"namespace\",\r\n \"prefix\",\r\n \"cdata_list_attributes\",\r\n \"preserve_whitespace_tag_stack\",\r\n \"open_tag_counter\",\r\n \"preserve_whitespace_tags\",\r\n \"interesting_string_types\",\r\n \"current_data\",\r\n \"string_container_stack\",\r\n \"_most_recent_element\",\r\n \"currentTag\",\r\n )\r\n parser: str\r\n Have a look at the bs4 documentation\r\n (default='lxml')\r\n tags_to_find:Union[bool,str]=True\r\n will be passed to soup.find_all()\r\n Have a look at the bs4 documentation\r\n (default=True) #everything\r\nReturns:\r\n df: pd.DataFrame\r\n```\r\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/hansalemaos/a_pandas_ex_bs4df", "keywords": "BeautifulSoup4,bs4,pandas,web scraping", "license": "MIT", "maintainer": "", "maintainer_email": "", "name": "a-pandas-ex-bs4df", "package_url": "https://pypi.org/project/a-pandas-ex-bs4df/", "platform": null, "project_url": "https://pypi.org/project/a-pandas-ex-bs4df/", "project_urls": { "Homepage": "https://github.com/hansalemaos/a_pandas_ex_bs4df" }, "release_url": "https://pypi.org/project/a-pandas-ex-bs4df/0.12/", "requires_dist": [ "beautifulsoup4", "lxml", "pandas", "regex", "requests", "useful-functions-easier-life" ], "requires_python": "", "summary": "One-line-web-scraping by combining pandas and BeautifulSoup4", "version": "0.12", "yanked": false, "yanked_reason": null }, "last_serial": 20364752, "releases": { "0.10": [ { "comment_text": "", "digests": { "blake2b_256": "0321d85dcef2301023e46cf66aeed325a0fce1492c89d90d84295561840ee67d", "md5": "eb457682b329a9b7d96ab8ce71a4e177", "sha256": "58383acd844ccdac85b7f22a2e865bc077e944bcfc02f615d23a563168ccdebf" }, "downloads": -1, "filename": "a_pandas_ex_bs4df-0.10-py3-none-any.whl", "has_sig": false, "md5_digest": "eb457682b329a9b7d96ab8ce71a4e177", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7742, "upload_time": "2022-10-29T21:42:52", "upload_time_iso_8601": "2022-10-29T21:42:52.969587Z", "url": "https://files.pythonhosted.org/packages/03/21/d85dcef2301023e46cf66aeed325a0fce1492c89d90d84295561840ee67d/a_pandas_ex_bs4df-0.10-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "blake2b_256": "ce0472c94c4c717af32875e28964dce7b9ea04824eb56a11518f6e2f24f7ed6c", "md5": "c90939dab6c03bc332d2f8b019acafe0", "sha256": "2b22ace100590415338716a259c3adcbb939042ec08998b5abb975ecdf73a845" }, "downloads": -1, "filename": "a_pandas_ex_bs4df-0.10.tar.gz", "has_sig": false, "md5_digest": "c90939dab6c03bc332d2f8b019acafe0", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5352, "upload_time": "2022-10-29T21:42:54", "upload_time_iso_8601": "2022-10-29T21:42:54.881414Z", "url": "https://files.pythonhosted.org/packages/ce/04/72c94c4c717af32875e28964dce7b9ea04824eb56a11518f6e2f24f7ed6c/a_pandas_ex_bs4df-0.10.tar.gz", "yanked": false, "yanked_reason": null } ], "0.11": [ { "comment_text": "", "digests": { "blake2b_256": "2a1ef27441fa07c5f6a4be76089e293e5e99411d366fa2d390c035454da76b76", "md5": "1810c993464b56b0d2f333f4ab968ef5", "sha256": "3fbaf2a18acf02bbefc1901a3cc2ea0f142d6d36af05aa486067072fde3aaef9" }, "downloads": -1, "filename": "a_pandas_ex_bs4df-0.11-py3-none-any.whl", "has_sig": false, "md5_digest": "1810c993464b56b0d2f333f4ab968ef5", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 31787, "upload_time": "2023-10-12T17:14:11", "upload_time_iso_8601": "2023-10-12T17:14:11.820280Z", "url": "https://files.pythonhosted.org/packages/2a/1e/f27441fa07c5f6a4be76089e293e5e99411d366fa2d390c035454da76b76/a_pandas_ex_bs4df-0.11-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "blake2b_256": "07b5619371126d42b33dc81d0b9a5edc29e5949b2886d78cb9e7ec8b13d78d7b", "md5": "9907b3210958aa82375b4f6b17b6b7a5", "sha256": "0427e3da8dcdc39e5544e45816854fae23fc4811edc1f1de7dab590d4fd51ee3" }, "downloads": -1, "filename": "a_pandas_ex_bs4df-0.11.tar.gz", "has_sig": false, "md5_digest": "9907b3210958aa82375b4f6b17b6b7a5", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 30689, "upload_time": "2023-10-12T17:14:14", "upload_time_iso_8601": "2023-10-12T17:14:14.033453Z", "url": "https://files.pythonhosted.org/packages/07/b5/619371126d42b33dc81d0b9a5edc29e5949b2886d78cb9e7ec8b13d78d7b/a_pandas_ex_bs4df-0.11.tar.gz", "yanked": false, "yanked_reason": null } ], "0.12": [ { "comment_text": "", "digests": { "blake2b_256": "560e71c035e9b8e675fb8ea4487276c3a53ae01e05d0094a13db05cd32c0bc79", "md5": "cf8396f90949c6977b55735e8c3c45d2", "sha256": "305eb2e4f0e4da5b1760caea05913ce85344cc52538b0ba44dbb3461822b7a80" }, "downloads": -1, "filename": "a_pandas_ex_bs4df-0.12-py3-none-any.whl", "has_sig": false, "md5_digest": "cf8396f90949c6977b55735e8c3c45d2", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 12862, "upload_time": "2023-10-27T15:07:19", "upload_time_iso_8601": "2023-10-27T15:07:19.331223Z", "url": "https://files.pythonhosted.org/packages/56/0e/71c035e9b8e675fb8ea4487276c3a53ae01e05d0094a13db05cd32c0bc79/a_pandas_ex_bs4df-0.12-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "blake2b_256": "7ccb531cb3d2430149f436bee195c0b64d75923bdaf8309df4f8d7c7b67846a1", "md5": "a60084d3daa14362ffd86284c4842766", "sha256": "a8ad36f66097ab9ce2b87bf78834a83f98dbcae52028ef4273539989fc4dad98" }, "downloads": -1, "filename": "a_pandas_ex_bs4df-0.12.tar.gz", "has_sig": false, "md5_digest": "a60084d3daa14362ffd86284c4842766", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 11690, "upload_time": "2023-10-27T15:07:21", "upload_time_iso_8601": "2023-10-27T15:07:21.621621Z", "url": "https://files.pythonhosted.org/packages/7c/cb/531cb3d2430149f436bee195c0b64d75923bdaf8309df4f8d7c7b67846a1/a_pandas_ex_bs4df-0.12.tar.gz", "yanked": false, "yanked_reason": null } ] }, "urls": [ { "comment_text": "", "digests": { "blake2b_256": "560e71c035e9b8e675fb8ea4487276c3a53ae01e05d0094a13db05cd32c0bc79", "md5": "cf8396f90949c6977b55735e8c3c45d2", "sha256": "305eb2e4f0e4da5b1760caea05913ce85344cc52538b0ba44dbb3461822b7a80" }, "downloads": -1, "filename": "a_pandas_ex_bs4df-0.12-py3-none-any.whl", "has_sig": false, "md5_digest": "cf8396f90949c6977b55735e8c3c45d2", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 12862, "upload_time": "2023-10-27T15:07:19", "upload_time_iso_8601": "2023-10-27T15:07:19.331223Z", "url": "https://files.pythonhosted.org/packages/56/0e/71c035e9b8e675fb8ea4487276c3a53ae01e05d0094a13db05cd32c0bc79/a_pandas_ex_bs4df-0.12-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "blake2b_256": "7ccb531cb3d2430149f436bee195c0b64d75923bdaf8309df4f8d7c7b67846a1", "md5": "a60084d3daa14362ffd86284c4842766", "sha256": "a8ad36f66097ab9ce2b87bf78834a83f98dbcae52028ef4273539989fc4dad98" }, "downloads": -1, "filename": "a_pandas_ex_bs4df-0.12.tar.gz", "has_sig": false, "md5_digest": "a60084d3daa14362ffd86284c4842766", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 11690, "upload_time": "2023-10-27T15:07:21", "upload_time_iso_8601": "2023-10-27T15:07:21.621621Z", "url": "https://files.pythonhosted.org/packages/7c/cb/531cb3d2430149f436bee195c0b64d75923bdaf8309df4f8d7c7b67846a1/a_pandas_ex_bs4df-0.12.tar.gz", "yanked": false, "yanked_reason": null } ], "vulnerabilities": [] }