← 返回首页
created webscarping.py for html parsing by cherukuri12 · Pull Request #11 · realpython/python-scripts · GitHub
Skip to content

Navigation Menu

Toggle navigation
Sign in
Appearance settings
Search or jump to...

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Include my email address so I can be contacted

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Resetting focus
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension .py  (1) All 1 file type selected Viewed files
Conversations
Failed to load comments. Retry
Loading
Jump to
Jump to file
Failed to load files. Retry
Loading
Diff view
Unified
Split
Hide whitespace
Apply and reload
Show whitespace
Diff view
Unified
Split
Hide whitespace
Apply and reload
34 changes: 34 additions & 0 deletions scripts/webscraping.py
Show comments View file Edit file Delete file Open in desktop
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import requests
from bs4 import BeautifulSoup

def cars_brand_links():
url = 'https://www.carsprite.com/en/car-prices/'
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text)
for link in soup.findAll("a"):
href = "https://www.carsprite.com/en/" + link.get('href')
if "car-prices/" not in href:
pass
else:
data = href
get_single_item_data(data)
def get_single_item_data(brand_url):
source_code = requests.get(brand_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text)
for link in soup.findAll("a"):
href1 = link.get('href')
if "/en/" not in href1:
data1 = href1
if "https" not in data1:
data2 = data1
if "/car-prices/" not in data2:
data_final = 'https://www.carsprite.com/en/car-prices/' + data2
print(data_final)

else:
pass


cars_brand_links()
Toggle all file notes Toggle all file annotations

Footer

© 2026 GitHub, Inc.