telegram group links saving

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import pickle
import time
import re
import csv
import os


# Specify the path to the ChromeDriver executable
chrome_driver_path = '/Users/jitendersingh/Documents/chromedriver'  # Change this to the actual path
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service)
wait = WebDriverWait(driver, 120)
actions = ActionChains(driver)


# Regular expression pattern to match URLs
url_pattern = r'https://terab\S+'

# Open the login page
driver.get('https://web.telegram.org/k/#-1784024652')
wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div/div[1]/div[1]/button/div')))
driver.get('https://web.telegram.org/k/#-1784024652')
group_element = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[2]/div/div/div[2]/div[1]/div[1]/div/div/div[1]/div/span')))
group_element.click()
print("group clicked")
time.sleep(2)
group_element = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[2]/div/div/div[2]/div[1]/div[1]/div/div/div[1]/div/span')))
group_element.click()
print("group clicked")
time.sleep(2)

imgsection = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[3]/div/div/div[2]/div/div/div[3]/div[2]/div[2]/div/div/div[2]/div[1]/img')))
imgsection.click()
print("imgsection clicked")
time.sleep(2)

img = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[5]/div')))
img.click()
print("img clicked")
time.sleep(2)


# Function to extract links from text
def extract_links_from_text(text):
    import re
    url_pattern = r'https://terab\S+'
    links = re.findall(url_pattern, text)
    return links

# Initialize WebDriver and navigate to the page
# ...

csv_filename = '/Users/jitendersingh/Documents/onlylinks.csv'  # Replace with the actual CSV file path
max_iterations = 500  # Set the maximum number of iterations

# Loop to extract links from each element
for iteration in range(max_iterations):
    # Send the right arrow key
    actions.send_keys(Keys.ARROW_RIGHT).perform()
    
    # Wait for a brief moment to allow content to load
    time.sleep(1)
    wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[5]/div')))

    # Find the element containing the text
    element = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div[5]/div')
    
    # Get the text from the element
    text = element.text
    
    # Extract links from the text
    links = extract_links_from_text(text)
    
    # Append the links to the CSV file
    with open(csv_filename, 'a') as csv_file:
        for link in links:
            csv_file.write(link + '\n')
    time.sleep(1)
    # Check if there is any more content to navigate

# Close the driver
driver.quit()
telegram group links saving

0 comments:

Post a Comment

Populars

Archives

telegram group links saving

Next

Newer Post

Previous

Older Post

0 comments:

Post a Comment