from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import pickle
import time
import re
import csv
import os
# Specify the path to the ChromeDriver executable
chrome_driver_path = '/Users/jitendersingh/Documents/chromedriver' # Change this to the actual path
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service)
wait = WebDriverWait(driver, 120)
actions = ActionChains(driver)
# Regular expression pattern to match URLs
url_pattern = r'https://terab\S+'
# Open the login page
driver.get('https://web.telegram.org/k/#-1784024652')
wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div/div[1]/div[1]/button/div')))
driver.get('https://web.telegram.org/k/#-1784024652')
group_element = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[2]/div/div/div[2]/div[1]/div[1]/div/div/div[1]/div/span')))
group_element.click()
print("group clicked")
time.sleep(2)
group_element = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[2]/div/div/div[2]/div[1]/div[1]/div/div/div[1]/div/span')))
group_element.click()
print("group clicked")
time.sleep(2)
imgsection = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[3]/div/div/div[2]/div/div/div[3]/div[2]/div[2]/div/div/div[2]/div[1]/img')))
imgsection.click()
print("imgsection clicked")
time.sleep(2)
img = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[5]/div')))
img.click()
print("img clicked")
time.sleep(2)
# Function to extract links from text
def extract_links_from_text(text):
import re
url_pattern = r'https://terab\S+'
links = re.findall(url_pattern, text)
return links
# Initialize WebDriver and navigate to the page
# ...
csv_filename = '/Users/jitendersingh/Documents/onlylinks.csv' # Replace with the actual CSV file path
max_iterations = 500 # Set the maximum number of iterations
# Loop to extract links from each element
for iteration in range(max_iterations):
# Send the right arrow key
actions.send_keys(Keys.ARROW_RIGHT).perform()
# Wait for a brief moment to allow content to load
time.sleep(1)
wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[5]/div')))
# Find the element containing the text
element = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div[5]/div')
# Get the text from the element
text = element.text
# Extract links from the text
links = extract_links_from_text(text)
# Append the links to the CSV file
with open(csv_filename, 'a') as csv_file:
for link in links:
csv_file.write(link + '\n')
time.sleep(1)
# Check if there is any more content to navigate
# Close the driver
driver.quit()
0 comments:
Post a Comment