python: Text-to-Speech and Speech-to-Text

发布时间 2023-08-05 11:20:43作者: ®Geovin Du Dream Park™

 

"""
python.exe -m pip install --upgrade pip
pip install pyttsx3
pip install comtypes
pip install Pillow
pip install requests

pip install PocketSphinx
pip install SpeechRecognition

python: 3.11

"""


import sys
import os
import pyttsx3 as pyttsx
from win32com.client import Dispatch
from comtypes.client import CreateObject
#from comtypes.tools import SpeechLib
import speech_recognition as sr
engine = CreateObject("SAPI.SpVoice")
stream = CreateObject("SAPI.SpFileStream")
from comtypes.gen import SpeechLib


class ttsHepler(object):
    """

    """



    def ttsstrx(self, word:str):
        """
        文本转换为语音
        :param word:
        :return:
        """
        engine = pyttsx.init()
        engine.say(word)
        engine.runAndWait()


    def ttswin(self,word:str):
        """
        文本转换为语音
        :param word:
        :return:
        """
        #msg = "Python由荷兰数学和计算机科学研究学会的吉多·范罗苏姆于1990年代初设计,作为一门叫做ABC语言的替代品。"
        speaker = Dispatch("SAPI.SpVoice")
        speaker.Speak(word)
        del speaker

    def ttslib(self,word:str,dy=False):
        """
     文本转语音
        :param word:
        :return:
        """
        engine = CreateObject("SAPI.SpVoice",dynamic=dy)
        stream = CreateObject("SAPI.SpFileStream",dynamic=dy)
        from comtypes.gen import SpeechLib
        infile = 'fileText.txt'
        f = open(infile, 'r',encoding='utf-8')
        theText = f.read()
        f.close()

        outfile = 'demo_audio.wav'

        stream.Open(outfile, SpeechLib.SSFMCreateForWrite)
        engine.AudioOutputStream = stream
        engine.speak(theText)
        stream.close()

    def sttspeech(self):
        """
        语音转换为文本
        下载普通话识别文件
        下载路径:https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/Mandarin/
        解压之后,修改文件名称,cmusphinx-zh-cn-5.2 改为 zh-CN,
        zh_cn.cd_cont_5000文件夹改为acoustic-model,
        zh_cn.dic改为pronounciation-dictionary.dict,
        zh_cn.lm.bin改为language-model.lm.bin。
        然后移动zn-CN文件夹到python3\Lib\site-packages\speech_recognition\pocketsphinx-data下。
        :return:
        """
        r = sr.Recognizer()
        audio_file = 'demo_audio.wav'
        with sr.AudioFile(audio_file) as source:
            audio = r.record(source)
        try:
            print("文本内容:", r.recognize_sphinx(audio, language='zh-CN'))
            # 默认会识别为英文,如果要识别中文,需要下载普通话识别文件
        except Exception as e:
            print(e)