import whisper
import os
import subprocess
from bs4 import BeautifulSoup
# # === CONFIG ===
# video_file = "C:/users/Bot/Documents/test_j.mp4"
# output_folder = "C:/users/Bot/Documents/output_sections"
# os.makedirs(output_folder, exist_ok=True)
# SECTION_GAP = 2.0 # only this split
# # === LOAD MODEL ===
# model = whisper.load_model("large")
# # === TRANSCRIBE ===
# print("Transcribing video...")
# result = model.transcribe(video_file, word_timestamps=True, language="hi")
# segments = result["segments"]
# # === GROUP SEGMENTS INTO SECTIONS ===
# sections = []
# current_section = [segments[0]]
# for i in range(1, len(segments)):
# gap = segments[i]["start"] - segments[i-1]["end"]
# if gap >= SECTION_GAP:
# sections.append(current_section)
# current_section = []
# current_section.append(segments[i])
# if current_section:
# sections.append(current_section)
# print(f"Sections: {len(sections)}")
# # === PROCESS SECTIONS ===
# for s_idx, section in enumerate(sections, start=1):
# sec_start = section[0]["start"]
# sec_end = section[-1]["end"]
# # Calculate exact duration
# duration = sec_end - sec_start
# # Failsafe: If a section is insanely short, give it a tiny padding so FFmpeg doesn't break
# if duration < 0.5:
# duration = 0.5
# # ---- CUT VIDEO (FIXED: Frame-accurate cut instead of -c copy) ----
# out_video = os.path.join(output_folder, f"section_{s_idx}.mp4")
# subprocess.run([
# "ffmpeg", "-y",
# "-hwaccel", "cuda",
# "-ss", str(sec_start),
# "-t", str(duration),
# "-i", video_file,
# "-c:v", "h264_nvenc",
# "-preset", "fast",
# "-b:v", "5M",
# "-c:a", "aac",
# "-b:a", "192k",
# out_video
# ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# # === SAVE SEGMENTS AS LINES ===
# for l_idx, seg in enumerate(section, start=1):
# base = f"section_{s_idx}_line_{l_idx}"
# # --- Make times relative to this specific section ---
# relative_start = seg["start"] - sec_start
# relative_end = seg["end"] - sec_start
# # Avoid tiny negative times due to floating point math
# relative_start = max(0.0, relative_start)
# relative_end = max(0.1, relative_end)
# # text
# with open(os.path.join(output_folder, base + ".txt"), "w", encoding="utf-8") as f:
# f.write(seg["text"].strip())
# # start (rounded to 3 decimal places for cleanliness)
# with open(os.path.join(output_folder, base + "_start.txt"), "w") as f:
# f.write(str(round(relative_start, 3)))
# # end
# with open(os.path.join(output_folder, base + "_end.txt"), "w") as f:
# f.write(str(round(relative_end, 3)))
# print(f"Section {s_idx} → {len(section)} lines | Duration: {round(duration, 2)}s")
# print("✅ Done (Whisper-based lines, frame-accurate splits generated)")
# # === FOLDER WHERE YOUR FILES ARE ===
# input_folder = output_folder
# # === COLLECT DATA ===
# sections_data = {}
# for file in os.listdir(input_folder):
# if file.endswith(".txt") and "_line_" in file and "_start" not in file and "_end" not in file:
# parts = file.replace(".txt", "").split("_")
# section_num = int(parts[1])
# line_num = int(parts[3])
# with open(os.path.join(input_folder, file), "r", encoding="utf-8") as f:
# text = f.read().strip()
# if section_num not in sections_data:
# sections_data[section_num] = []
# sections_data[section_num].append((line_num, text, file))
# # sort properly
# for sec in sections_data:
# sections_data[sec].sort(key=lambda x: x[0])
# # === BUILD STRUCTURED TEXT ===
# structured_text = ""
# for sec in sorted(sections_data.keys()):
# structured_text += f"Section {sec}:\n"
# for line_num, text, filename in sections_data[sec]:
# structured_text += f"{filename}: {text}\n"
# structured_text += "___\n\n"
# # === FINAL GEMINI PROMPT ===
# final_prompt = f"""
# You are an expert AI prompt engineer and video content visual designer.
# I will provide you structured transcript data divided into sections and lines.
# YOUR TASK:
# 1. First, carefully read and understand the FULL context of all sections together before generating anything.
# 2. For EACH section:
# - You MUST generate a strong, engaging **Section Heading (Lower Third Style)**.
# - This is COMPULSORY. Do NOT skip.
# - The heading should feel like a video subtitle / hook (short, impactful, viewer-friendly).
# 3. For EACH line:
# - Generate a **highly detailed cinematic image generation prompt**.
# - The prompt MUST include:
# - scene description
# - subject details
# - lighting
# - camera angle
# - mood / emotion
# - realism level (photorealistic, cinematic, ultra-detailed, etc.)
# - background environment details
# IMPORTANT PRODUCT RULE:
# - I will provide 1 product image separately in my pipeline.
# - If relevant, intelligently include the product in the scene.
# - The product MUST:
# - look realistic
# - be naturally placed in the environment
# - NOT be altered in any way
# - NOT change packaging, label, branding, or colors
# OUTPUT FORMAT (VERY STRICT — MUST FOLLOW):
# - Output EVERYTHING in ONE SINGLE HTML CODE BLOCK **in code format**.
# - Do NOT output multiple HTML blocks
# - Do NOT add any explanation or text outside HTML.
# - I only need 15 images maximum. don't make images prompt for outro portion, like subscribe etc...
# - instead of added No image prompt needed. don't add that line in html (if there is no need image for any line).
# Structure EXACTLY like this:
# <div class="section_1">
# <div class="section_heading">Lower third heading here</div>
# <div class="line section_1_line_1">
# <div class="text">Original text here</div>
# <div class="image_prompt">Detailed cinematic prompt here</div>
# </div>
# <div class="line section_1_line_2">
# <div class="text">Original text here</div>
# <div class="image_prompt">Detailed cinematic prompt here</div>
# </div>
# </div>
# <div class="section_2">
# <div class="section_heading">Lower third heading here</div>
# <div class="line section_2_line_1">
# <div class="text">Original text here</div>
# <div class="image_prompt">Detailed cinematic prompt here</div>
# </div>
# </div>
# STRICT RULES:
# - Section heading is MANDATORY for every section
# - DO NOT skip any section or line
# - Keep div class names EXACT (used in automation)
# - DO NOT rename classes
# - DO NOT shorten prompts
# - Each image prompt must be detailed (minimum 2–4 lines)
# - Maintain story continuity across sections
# - Keep visuals relevant to the text
# FINAL OUTPUT:
# - ONLY ONE HTML BLOCK
# - NO markdown
# - NO explanation
# - NO extra text
# HERE IS THE DATA:
# {structured_text}
# """
# # === SAVE PROMPT ===
# output_prompt_file = os.path.join(input_folder, "gemini_prompt.txt")
# with open(output_prompt_file, "w", encoding="utf-8") as f:
# f.write(final_prompt)
# print("✅ Gemini prompt generated:", output_prompt_file)
##############################--------------IMAGE GENERATION---------------------------------------------------------------------
# def clear_download_folder(folder_path):
# if not os.path.exists(folder_path):
# os.makedirs(folder_path)
# return
# for filename in os.listdir(folder_path):
# file_path = os.path.join(folder_path, filename)
# try:
# if os.path.isfile(file_path) or os.path.islink(file_path):
# os.unlink(file_path)
# elif os.path.isdir(file_path):
# shutil.rmtree(file_path)
# except Exception as e:
# print(f"Failed to delete {file_path}. Reason: {e}")
# def copy_image_to_clipboard(image_path):
# image = Image.open(image_path)
# output = BytesIO()
# image.convert("RGB").save(output, "BMP")
# data = output.getvalue()[14:]
# output.close()
# win32clipboard.OpenClipboard()
# win32clipboard.EmptyClipboard()
# win32clipboard.SetClipboardData(win32clipboard.CF_DIB, data)
# win32clipboard.CloseClipboard()
# # --- Phase 1: Setup (Runs Once) ---
# def setup_workspace(driver, image_path_to_upload):
# """
# Runs once before the loop. Navigates, clicks I agree, pastes the base image,
# and captures the base image's unique ID so it doesn't get lost later.
# """
# print("visiting url")
# driver.get("https://labs.google/fx/tools/flow")
# try:
# new_project_btn = WebDriverWait(driver, 20).until(
# EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'New project')]"))
# )
# new_project_btn.click()
# print("Successfully clicked 'New project' button.")
# time.sleep(2)
# except Exception as e:
# print(f"Error: Could not find or click the 'New project' button. {e}")
# try:
# agree_button = WebDriverWait(driver, 5).until(
# EC.presence_of_element_located((By.XPATH, "//button[contains(text(), 'I agree')]"))
# )
# driver.execute_script("arguments[0].click();", agree_button)
# print(" > 'I agree' popup found and clicked via JS.")
# time.sleep(1)
# except Exception:
# print(" > 'I agree' popup not detected, continuing...")
# try:
# textbox_element = WebDriverWait(driver, 30).until(
# EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[role="textbox"][contenteditable="true"]'))
# )
# print("Editor workspace loaded. Textbox is ready for input.")
# except Exception as e:
# print(f"Error waiting for editor workspace: {e}")
# base_image_id = None
# if os.path.exists(image_path_to_upload):
# print(f"Uploading base image: {image_path_to_upload}")
# copy_image_to_clipboard(image_path_to_upload)
# try:
# textbox_element = driver.find_element(By.CSS_SELECTOR, 'div[role="textbox"][contenteditable="true"]')
# textbox_element.click()
# time.sleep(0.5)
# textbox_element.send_keys(Keys.CONTROL, 'v')
# # Wait for upload confirmation
# WebDriverWait(driver, 120).until(
# EC.presence_of_element_located((By.XPATH, "//button[contains(., 'Clear prompt')]"))
# )
# print(" > Success: Base Image uploaded (Clear button appeared)")
# time.sleep(2) # Wait for image to render in the workspace
# # Capture just the unique ID from the src to avoid exact-match errors later
# media_images = driver.find_elements(By.XPATH, "//img[contains(@src, 'media.getMediaUrlRedirect')]")
# if media_images:
# src = media_images[-1].get_attribute("src")
# if "name=" in src:
# base_image_id = src.split("name=")[1].split("&")[0]
# else:
# base_image_id = src[-30:] # fallback
# print(f" > Base image ID captured successfully: {base_image_id}")
# except Exception as e:
# print(f"Could not interact with Selenium during setup. Error: {e}")
# else:
# print(f"Image {image_path_to_upload} not found.")
# return base_image_id
# # --- Phase 2: Generation and Cleanup (Runs in Loop) ---
# def process_single_prompt(
# driver,
# base_image_id,
# prompt,
# download_path,
# move_image_directory,
# final_filename,
# image_path_to_upload
# ):
# """
# Types the prompt, waits for generation, downloads, deletes the generated image,
# and adds the base image back to the prompt box for the next loop.
# """
# try:
# # 1. Capture existing image URLs BEFORE generating
# existing_imgs = driver.find_elements(By.TAG_NAME, "img")
# old_srcs = set([img.get_attribute("src") for img in existing_imgs])
# # 2. Send Text and Press Enter
# textbox_element = driver.find_element(By.CSS_SELECTOR, 'div[role="textbox"][contenteditable="true"]')
# textbox_element.click()
# final_message = f"{prompt}\n\n"
# lines = final_message.split('\n')
# for index, line in enumerate(lines):
# if line:
# textbox_element.send_keys(line)
# if index < len(lines) - 1:
# textbox_element.send_keys(Keys.SHIFT, Keys.ENTER)
# time.sleep(1)
# textbox_element.send_keys(Keys.ENTER)
# print(f" > Waiting for a NEW unique image to appear...")
# # 3. Wait for the NEW image
# new_image = None
# def find_new_image_element(d):
# current_images = d.find_elements(By.TAG_NAME, "img")
# for img in current_images:
# if img.get_attribute("src") not in old_srcs:
# return img
# return False
# try:
# new_image = WebDriverWait(driver, 90).until(find_new_image_element)
# print(f" > Success: New unique image detected.")
# time.sleep(5)
# except:
# print(f" > Timeout: No new image appeared. Checking for Retry button...")
# new_image = None
# try:
# # Retry Logic
# retry_button = WebDriverWait(driver, 5).until(
# EC.element_to_be_clickable((By.XPATH, "//button[descendant::span[contains(text(), 'Retry')]]"))
# )
# print(" > Retry button found. Clicking...")
# driver.execute_script("arguments[0].click();", retry_button)
# time.sleep(5)
# print(" > Waiting again for image generation (Attempt 2)...")
# new_image = WebDriverWait(driver, 90).until(find_new_image_element)
# print(f" > Success: New unique image detected after retry.")
# time.sleep(5)
# except Exception as e:
# print(f" > Final Failure: Retry button not found or generation failed again. {e}")
# print(" > Executing Fallback: Copying base image to act as downloaded file.")
# # Copy fallback
# clear_download_folder(download_path)
# fallback_file_path = os.path.join(download_path, "fallback_image.jpg")
# shutil.copy(image_path_to_upload, fallback_file_path)
# new_image = "FALLBACK"
# # Click Delete_Forever button on the failed image UI
# try:
# delete_forever_btn = WebDriverWait(driver, 5).until(
# EC.element_to_be_clickable((By.XPATH, "//button[descendant::i[text()='delete_forever']]"))
# )
# driver.execute_script("arguments[0].click();", delete_forever_btn)
# print(" > Clicked 'delete_forever' on failed image element.")
# time.sleep(1)
# except Exception as del_err:
# print(" > Could not find 'delete_forever' button.", del_err)
# # --- G. Right Click and Download ---
# if new_image:
# if new_image != "FALLBACK":
# try:
# clear_download_folder(download_path)
# print(" > Download folder cleared. Attempting to download...")
# driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", new_image)
# time.sleep(1)
# actions = ActionChains(driver)
# actions.move_to_element(new_image).pause(1).context_click().perform()
# download_option = WebDriverWait(driver, 10).until(
# EC.visibility_of_element_located((By.XPATH, "//div[@role='menuitem']//div[contains(text(), 'Download')]"))
# )
# actions.move_to_element(download_option).perform()
# time.sleep(0.5)
# k1_button = WebDriverWait(driver, 10).until(
# EC.visibility_of_element_located((By.XPATH, "//button[@role='menuitem']//span[contains(text(), '1K')]"))
# )
# k1_button.click()
# print(f" > Success: Download initiated")
# time.sleep(3)
# except Exception as e:
# print(f"Error during download sequence: {e}")
# else:
# print(" > Bypassing browser download (Using Fallback Image).")
# # --- H. Wait for Download to Finish ---
# downloaded_file = None
# start_wait = time.time()
# print(f" > Waiting for file to appear in {download_path}...")
# while (time.time() - start_wait) < 60:
# files = os.listdir(download_path)
# valid_files = [f for f in files if not f.endswith('.crdownload') and not f.endswith('.tmp')]
# if valid_files:
# downloaded_file = os.path.join(download_path, valid_files[0])
# print(f" > File detected: {valid_files[0]}")
# break
# time.sleep(1)
# if downloaded_file:
# # --- J. Move ---
# if not os.path.exists(move_image_directory):
# os.makedirs(move_image_directory)
# destination_path = os.path.join(move_image_directory, final_filename)
# time.sleep(1.5)
# shutil.move(downloaded_file, destination_path)
# print(f" > Success: Moved to {destination_path}")
# else:
# print(f"Error: Download timed out")
# # --- K. Clean up Generated Image ---
# if new_image != "FALLBACK":
# print(" > Deleting generated image from workspace...")
# try:
# actions = ActionChains(driver)
# actions.move_to_element(new_image).pause(1).context_click().perform()
# delete_option = WebDriverWait(driver, 5).until(
# EC.element_to_be_clickable((By.XPATH, "//*[@role='menuitem' and descendant-or-self::*[contains(text(), 'Delete')]]"))
# )
# delete_option.click()
# time.sleep(1)
# except Exception as e:
# print(f" > Failed to right-click delete the generated image: {e}")
# # --- L. Reset: Add Base Image back to Prompt ---
# if base_image_id:
# print(" > Adding base image back to prompt for next run...")
# try:
# # Wait a moment for DOM to settle after deletion
# time.sleep(1)
# # Find using 'contains' with the UUID instead of an exact match
# base_img_element = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.XPATH, f"//img[contains(@src, '{base_image_id}')]"))
# )
# # Scroll into view
# driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", base_img_element)
# time.sleep(1)
# actions = ActionChains(driver)
# actions.move_to_element(base_img_element).pause(1).context_click().perform()
# add_to_prompt = WebDriverWait(driver, 5).until(
# EC.element_to_be_clickable((By.XPATH, "//*[@role='menuitem' and descendant-or-self::*[contains(text(), 'Add to Prompt')]]"))
# )
# add_to_prompt.click()
# # Wait for image to populate in text box (Clear prompt button appears)
# WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.XPATH, "//button[contains(., 'Clear prompt')]"))
# )
# print(" > Success: Base image re-added to prompt.")
# time.sleep(1)
# except Exception as e:
# print(f" > Could not add base image via ID. Attempting fallback... {e}")
# try:
# # Fallback: Find the first image that has 'media' in the URL but isn't a Generated Image
# fallback_img = WebDriverWait(driver, 5).until(
# EC.presence_of_element_located((By.XPATH, "//img[contains(@src, 'media') and not(@alt='Generated image')]"))
# )
# driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", fallback_img)
# time.sleep(1)
# actions = ActionChains(driver)
# actions.move_to_element(fallback_img).pause(1).context_click().perform()
# add_to_prompt = WebDriverWait(driver, 5).until(
# EC.element_to_be_clickable((By.XPATH, "//*[@role='menuitem' and descendant-or-self::*[contains(text(), 'Add to Prompt')]]"))
# )
# add_to_prompt.click()
# print(" > Fallback successful! Added image to prompt.")
# except Exception as fallback_e:
# print(f" > Fallback also failed: {fallback_e}")
# except Exception as e:
# print(f"Error in generation block: {e}")
# # ==========================================
# # MAIN EXECUTION SCRIPT
# # ==========================================
# # Define your paths
# my_image_path = "C:/users/Bot/Documents/base_image.png"
# my_target_folder = "C:/users/Bot/Documents/output_sections/"
# my_download_path = "G:/clipchamp_beu/"
# html_file_path = r"C:\Users\Bot\Documents\html_data.txt"
# # 1. SETUP WORKSPACE ONCE (Pass your bot3 driver here)
# print("=== Starting Workspace Setup ===")
# # Make sure bot3 is defined in your main script before this line
# captured_base_id = setup_workspace(bot3, my_image_path)
# if not captured_base_id:
# print("CRITICAL ERROR: Could not capture base image. Exiting script.")
# else:
# # 2. PARSE HTML
# with open(html_file_path, 'r', encoding='utf-8') as file:
# html_content = file.read()
# soup = BeautifulSoup(html_content, 'html.parser')
# line_divs = soup.find_all('div', class_='line')
# print(f"Found {len(line_divs)} prompts to process.")
# # 3. LOOP THROUGH PROMPTS
# for div in line_divs:
# class_list = div.get('class', [])
# line_name = next((c for c in class_list if c != 'line'), "unknown_line")
# prompt_div = div.find('div', class_='image_prompt')
# if prompt_div:
# prompt_text = prompt_div.get_text(strip=True)
# final_jpg_name = f"{line_name}.jpg"
# print("\n" + "=" * 50)
# print(f"Processing: {final_jpg_name}")
# print(f"Prompt: {prompt_text[:60]}...")
# # Process the image
# process_single_prompt(
# driver=bot3,
# base_image_id=captured_base_id,
# prompt=prompt_text,
# download_path=my_download_path,
# move_image_directory=my_target_folder,
# final_filename=final_jpg_name,
# image_path_to_upload=my_image_path
# )
# # Short pause between iterations to let the browser stabilize
# time.sleep(2)
# print("\nAll tasks completed successfully!")
0 comments:
Post a Comment