import requests
from bs4 import BeautifulSoup
import csv
headers = ["Index", "Loc", "Image Title", "Image Loc 1", "Image Loc 2", "Image Loc 3", "Image Loc 4", "Image Loc 5", "Image Loc 6"]
rows = []
for i in range(1, 21):
url = f"https://www.site.com/{i}.xml"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
counter = 1
for url in soup.find_all("url"):
loc = url.loc.text
image_locs = []
image_title = None
for image in url.find_all("image:image"):
if not image_title:
image_title = image.find("image:title").text
image_locs.append(image.find("image:loc").text)
row = [counter, loc, image_title] + image_locs + [None] * (6 - len(image_locs))
rows.append(row)
counter += 1
with open("/Users/jitendersingh/downloads/-data.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(headers)
writer.writerows(rows)
0 comments:
Post a Comment