Misty-Rhetorik-Coach/old/whisper_terminal_check.py
2026-05-06 12:46:05 +00:00

90 lines
2.8 KiB
Python

import whisper
import numpy as np
import subprocess
import time
import sys
import re
from config import RTSP_URL
FILLER_WORDS = ["äh", "ähhm", "ähm", "mhm", "halt", "quasi", "sozusagen", "eigentlich"]
def analyze_text(text):
text_clean = re.sub(r'[^\w\s]', '', text.lower())
words = text_clean.split()
found_fillers = {w: words.count(w) for w in FILLER_WORDS if w in words}
return sum(found_fillers.values()), found_fillers
def run_adaptive_whisper():
print(f"--- M2 Live-Coach: Analyse läuft ---")
print("Lade KI-Modell...")
model = whisper.load_model("base")
# FFmpeg mit längerer Analysezeit und TCP-Zwang
command = [
'ffmpeg',
'-rtsp_transport', 'tcp',
'-i', RTSP_URL,
'-ar', '16000', '-ac', '1', '-f', 's16le', '-'
]
print(f"Verbinde zu: {RTSP_URL}")
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
audio_buffer = []
silence_start = None
THRESHOLD = 300 # Etwas empfindlicher
SILENCE_DURATION = 2.5
print("[WARTE AUF STREAM...]")
try:
# 10 Versuche, den Stream-Anfang zu finden
for _ in range(100):
raw_chunk = process.stdout.read(3200)
if raw_chunk:
print("[MISTY HÖRT ZU] - Daten fließen!")
audio_buffer.append(np.frombuffer(raw_chunk, dtype=np.int16))
break
time.sleep(0.1)
if not audio_buffer:
print("❌ Fehler: Misty sendet keine Daten auf Port 1936.")
return
while True:
raw_chunk = process.stdout.read(3200)
if not raw_chunk: break
chunk_np = np.frombuffer(raw_chunk, dtype=np.int16)
audio_buffer.append(chunk_np)
amplitude = np.sqrt(np.mean(chunk_np**2)) if chunk_np.size > 0 else 0
if amplitude < THRESHOLD:
if silence_start is None:
silence_start = time.time()
elif time.time() - silence_start > SILENCE_DURATION:
if len(audio_buffer) > 20: break
else:
sys.stdout.write(".")
sys.stdout.flush()
silence_start = None
process.terminate()
full_audio = np.concatenate(audio_buffer).astype(np.float32) / 32768.0
result = model.transcribe(full_audio, language="de", initial_prompt="Äh, ähm, mhm.")
text = result['text'].strip()
count, details = analyze_text(text)
print("\n" + ""*45)
print(f"TEXT: {text}")
print(f"FÜLLWÖRTER: {count}")
print(""*45)
except Exception as e:
print(f"\nFehler: {e}")
finally:
if process: process.kill()
if __name__ == "__main__":
run_adaptive_whisper()