import whisper import numpy as np import subprocess import time import sys import re from config import RTSP_URL FILLER_WORDS = ["äh", "ähhm", "ähm", "mhm", "halt", "quasi", "sozusagen", "eigentlich"] def analyze_text(text): text_clean = re.sub(r'[^\w\s]', '', text.lower()) words = text_clean.split() found_fillers = {w: words.count(w) for w in FILLER_WORDS if w in words} return sum(found_fillers.values()), found_fillers def run_adaptive_whisper(): print(f"--- M2 Live-Coach: Analyse läuft ---") print("Lade KI-Modell...") model = whisper.load_model("base") # FFmpeg mit längerer Analysezeit und TCP-Zwang command = [ 'ffmpeg', '-rtsp_transport', 'tcp', '-i', RTSP_URL, '-ar', '16000', '-ac', '1', '-f', 's16le', '-' ] print(f"Verbinde zu: {RTSP_URL}") process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) audio_buffer = [] silence_start = None THRESHOLD = 300 # Etwas empfindlicher SILENCE_DURATION = 2.5 print("[WARTE AUF STREAM...]") try: # 10 Versuche, den Stream-Anfang zu finden for _ in range(100): raw_chunk = process.stdout.read(3200) if raw_chunk: print("[MISTY HÖRT ZU] - Daten fließen!") audio_buffer.append(np.frombuffer(raw_chunk, dtype=np.int16)) break time.sleep(0.1) if not audio_buffer: print("❌ Fehler: Misty sendet keine Daten auf Port 1936.") return while True: raw_chunk = process.stdout.read(3200) if not raw_chunk: break chunk_np = np.frombuffer(raw_chunk, dtype=np.int16) audio_buffer.append(chunk_np) amplitude = np.sqrt(np.mean(chunk_np**2)) if chunk_np.size > 0 else 0 if amplitude < THRESHOLD: if silence_start is None: silence_start = time.time() elif time.time() - silence_start > SILENCE_DURATION: if len(audio_buffer) > 20: break else: sys.stdout.write(".") sys.stdout.flush() silence_start = None process.terminate() full_audio = np.concatenate(audio_buffer).astype(np.float32) / 32768.0 result = model.transcribe(full_audio, language="de", initial_prompt="Äh, ähm, mhm.") text = result['text'].strip() count, details = analyze_text(text) print("\n" + "═"*45) print(f"TEXT: {text}") print(f"FÜLLWÖRTER: {count}") print("═"*45) except Exception as e: print(f"\nFehler: {e}") finally: if process: process.kill() if __name__ == "__main__": run_adaptive_whisper()