[docs]def__init__(self,start_time:int,end_time:int)->None:# Start and end times representing millisecondsself._start_time=start_timeself._end_time=end_time
@staticmethoddef_srt_time_to_ms(srt_time_string:str)->int:# Parse SRT time string into a datetime objecttime_format="%H:%M:%S,%f"dt=datetime.strptime(srt_time_string,time_format)ms=dt.microsecond//1000returndt.second*1000+ms
[docs]defsimilarity_score(self,other:"VideoModel")->float:# Tokenize the image descriptions by extracting individual words, stripping# trailing 's' (plural = singular) and converting the words to lowercase in# order to be case-insensitiveself_tokenized=set(word.lower().rstrip("s")forwordinself.image_description.split())other_tokenized=set(word.lower().rstrip("s")forwordinother.image_description.split())# Find common wordscommon_words=self_tokenized.intersection(other_tokenized)# Calculate similarity scoresimilarity_score=(len(common_words)/max(len(self_tokenized),len(other_tokenized))*100)returnsimilarity_score
[docs]defto_srt_entry(self,index:int)->str:def_ms_to_srt_time(ms:int)->str:"""Converts milliseconds to SRT time format 'HH:MM:SS,mmm'."""hours=int(ms//3600000)minutes=int((ms%3600000)//60000)seconds=int((ms%60000)//1000)milliseconds=int(ms%1000)returnf"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"return"\n".join([f"""{index}{_ms_to_srt_time(self._start_time)} --> {_ms_to_srt_time(self._end_time)}{self._closed_caption}""",])