import whisper import numpy as np import subprocess import time import sys import re from config import RTSP_URL # DEFINITION DER FÜLLWÖRTER FILLER_WORDS = ["äh", "ähhm", "ähm", "mhm", "halt", "quasi", "sozusagen", "eigentlich"] def analyze_text(text): """Sucht nach Füllwörtern im erkannten Text.""" text_clean = re.sub(r'[^\w\s]', '', text.lower()) # Entfernt einzelne Buchstaben/Satzzeichen und splittet in Wörter words = text_clean.split() found_fillers = {w: words.count(w) for w in FILLER_WORDS if w in words} return sum(found_fillers.values()), found_fillers def run_adaptive_whisper(): print(f"--- M2 Live-Coach: Analyse läuft ---") # Modell laden print("Lade Modell (base)...") model = whisper.load_model("base") # FFmpeg Befehl mit TCP für stabilere Verbindung command = [ 'ffmpeg', '-rtsp_transport', 'tcp', '-i', RTSP_URL, '-ar', '16000', '-ac', '1', '-f', 's16le', '-' ] # Startet den FFmpeg-Prozess process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) audio_buffer = [] silence_start = None # EINSTELLUNGEN FÜR DIE PAUSENERKENNUNG THRESHOLD = 350 # Empfindlichkeit SILENCE_DURATION = 2.5 # Sekunden Stille bis zum Ende MIN_AUDIO_LENGTH = 15 # Mindestmenge an Daten print(f"\nVerbindung zu: {RTSP_URL}") print("[MISTY HÖRT ZU] Bitte sprich jetzt (2.5s Pause zum Beenden)...") try: while True: raw_chunk = process.stdout.read(3200) if not raw_chunk: break chunk_np = np.frombuffer(raw_chunk, dtype=np.int16) if chunk_np.size == 0: continue audio_buffer.append(chunk_np) amplitude = np.sqrt(np.mean(chunk_np**2)) if amplitude < THRESHOLD: if silence_start is None: silence_start = time.time() elif time.time() - silence_start > SILENCE_DURATION: if len(audio_buffer) > MIN_AUDIO_LENGTH: print("\n[Pause erkannt - Analyse startet]") break else: sys.stdout.write(".") sys.stdout.flush() silence_start = None process.terminate() if not audio_buffer: print("\n❌ Fehler: Keine Audiodaten empfangen.") return print("Verarbeite Audio...") full_audio = np.concatenate(audio_buffer).astype(np.float32) / 32768.0 # Transkription mit Füllwort-Support result = model.transcribe(full_audio, language="de", initial_prompt="Äh, ähm, mhm.") text = result['text'].strip() count, details = analyze_text(text) # --- AUSGABE --- print("\n" + "═"*45) print(f"ERKANNT: {text}") print("─"*45) print(f"ANALYSE: {count} Füllwörter gefunden.") if count > 0: for w, n in details.items(): print(f" -> '{w}': {n}x") print("═"*45 + "\n") except Exception as e: print(f"\nFehler: {e}") finally: if process and process.poll() is None: process.kill() if __name__ == "__main__": run_adaptive_whisper()