import os
import speech_recognition as sr
from pydub.silence import detect_nonsilent
import re
from difflib import get_close_matches
import difflib
import time
import shutil
import glob
#--------------IMPORTANT----------->>>>>>>>>> audio file name sould be photo2.wav
original_folder = "/Users/jitendersingh/Documents/audiotest/photo2"
included_folder1 = "/Users/jitendersingh/Documents/audiotest/included1"
skipped_folder1 = "/Users/jitendersingh/Documents/audiotest/excluded1"
included_folder2 = "/Users/jitendersingh/Documents/audiotest/included2"
def split_audio_segments(audio_file):
# Load the audio file
audio = AudioSegment.from_file(audio_file)
# Set the silence threshold (in dB) and silence duration (in milliseconds)
silence_threshold = -50 # Adjust this value based on your audio
silence_duration = 500 # Minimum silence duration to consider as a gap
buffer_duration = 500 # Buffer duration in milliseconds
# Detect non-silent segments in the audio
non_silent_ranges = detect_nonsilent(audio, min_silence_len=silence_duration, silence_thresh=silence_threshold)
# Create a directory to save the segments
output_directory = os.path.splitext(audio_file)[0]
os.makedirs(output_directory, exist_ok=True)
# Transcribe each segment and save with appropriate filename
recognizer = sr.Recognizer()
recognized_count = 0 # Counter for recognized segments
for i, (start, end) in enumerate(non_silent_ranges):
# Calculate start and end positions with buffer
segment_start = max(0, start - buffer_duration)
segment_end = min(end + buffer_duration, len(audio))
# Extract the segment with buffer
segment = audio[segment_start:segment_end]
# Export segment as a temporary WAV file
temp_file = f"{output_directory}/temp.wav"
segment.export(temp_file, format="wav")
try:
# Transcribe the segment using speech recognition
with sr.AudioFile(temp_file) as audio_file:
audio_data = recognizer.record(audio_file)
transcription = recognizer.recognize_google(audio_data)
if transcription and transcription.strip():
recognized_count += 1 # Increment the counter for recognized segments
# Save the segment with the transcription as the filename
segment_filename = f"{output_directory}/{recognized_count}. {transcription}.wav"
segment.export(segment_filename, format="wav")
else:
# Skip saving the segment if transcription is empty
print(f"Skipping segment - Empty transcription")
except sr.UnknownValueError:
# Skip saving the segment if transcription is unknown
print(f"Skipping segment - Unrecognized transcription")
# Delete the temporary file
os.remove(temp_file)
print(f"{recognized_count} segments saved successfully.")
# Provide the path to your audio file
audio_file_path = "/Users/jitendersingh/Documents/audiotest/photo2.wav"
split_audio_segments(audio_file_path)
time.sleep(5)
## Segment created, now excluding duplicate segements and merging audio.---------------------
def concatenate_audio_segments(segment_directory, output_file):
segment_files = sorted(
(file for file in os.listdir(segment_directory) if not file.startswith('.') and file != 'temp.wav' and not os.path.isdir(os.path.join(segment_directory, file))),
key=lambda x: int(re.search(r'^(\d+)', x).group()) if re.search(r'^(\d+)', x) else float('inf')
)
# Read each segment file and concatenate them into a complete audio file
complete_audio = AudioSegment.empty()
previous_phrase = ""
next_phrase = ""
skipped_segments = []
for i, segment_file in enumerate(segment_files):
segment_path = os.path.join(segment_directory, segment_file)
# Load the segment audio
segment_audio = AudioSegment.from_file(segment_path, format="wav")
# Extract the initial phrase from the segment filename
match = re.match(r"\d+\. (.+)\.wav", segment_file)
if match:
current_phrase = match.group(1)
else:
current_phrase = ""
if i < len(segment_files) - 1:
next_segment_file = segment_files[i + 1]
next_match = re.match(r"\d+\. (.+)\.wav", next_segment_file)
if next_match:
next_phrase = next_match.group(1)
else:
next_phrase = ""
if current_phrase and current_phrase[:3] != next_phrase[:3]:
# Append the segment to the complete audio if the initial phrase is not a duplicate
complete_audio += segment_audio
previous_phrase = current_phrase
print(f"Included segment: {segment_file}")
shutil.move(f"{original_folder}/{segment_file}", f"{included_folder1}/{segment_file}")
else:
skipped_segments.append(segment_file)
print(f"Skipped segment: {segment_file}")
shutil.move(f"{original_folder}/{segment_file}", f"{skipped_folder1}/{segment_file}")
# Export the complete audio to the output file
if complete_audio:
complete_audio.export(output_file, format="wav")
print(f"Complete audio saved as {output_file}.")
else:
print("No segments found for concatenation.")
# Provide the directory containing the segment files
segment_directory = "/Users/jitendersingh/Documents/audiotest/photo2/"
# Provide the output file path for the complete audio
output_file = "/Users/jitendersingh/Documents/audiotest/photo2/zzzzzz.wav"
# Set the FFPROBE_PATH environment variable
os.environ["FFPROBE_PATH"] = "/opt/homebrew/bin/ffprobe"
concatenate_audio_segments(segment_directory, output_file)
# -------------------------- this code is half, to filter middle words... you have to complete the code--------------------------
def filter_files(file_list):
filtered_list = []
previous_middle_words = None
for file_name in reversed(file_list):
current_middle_words = get_middle_words(file_name)
if previous_middle_words is None or not is_similar_sequence(previous_middle_words, current_middle_words):
filtered_list.append(file_name)
previous_middle_words = current_middle_words
return list(reversed(filtered_list))
def get_middle_words(file_name):
words = file_name.split()[2:-1]#middle words match number.
middle_words = ' '.join(words)
return middle_words
def is_similar_sequence(seq1, seq2):
if seq1 is None:
return False
seq1_words = seq1.split()
seq2_words = seq2.split()
sequence_matcher = difflib.SequenceMatcher(None, seq1_words, seq2_words)
match_ratio = sequence_matcher.ratio()
return match_ratio >= 0.7
files = []
# Fetch all WAV files from the included folder
wav_files = glob.glob(included_folder1 + "/*.wav")
# Sort the WAV files numerically based on the numbers in the filenames
sorted_files = sorted(wav_files, key=lambda x: int(re.findall(r"\d+\.", x)[0].split('.')[0]))
# Iterate over the sorted WAV files and add their filenames to the list
for wav_file in sorted_files:
filename = wav_file.split("/")[-1] # Extract the filename from the file path
files.append(filename)
filtered_files = filter_files(files)
complete_audio = AudioSegment.empty()
for file_name in filtered_files:
source_path = f"{included_folder1}/{file_name}"
destination_path = f"{included_folder2}/{file_name}"
# Move the file from included_folder1 to included_folder2
shutil.move(source_path, destination_path)
print(f"Moved file: {file_name}")
# Load each segment audio
segment_audio = AudioSegment.from_file(destination_path, format="wav")
# Concatenate the segment to the complete audio
complete_audio += segment_audio
# Export the complete audio to a single WAV file
output_file = "/Users/jitendersingh/Documents/audiotest/finalfile.wav"
complete_audio.export(output_file, format="wav")
print("Concatenation completed. Output file saved as", output_file)