&#65339;feats/llm&#65341;&#35821;&#38899;&#22823;&#27169;&#22411;&#32972;&#26223;&#19979;&#30340;llm&#38598;&#25104;




&#29616;&#29366;
- deepspeed https://github.com/wenet-e2e/wenet/pull/2055 
- &#20013;&#25991;paraformer &#20840;&#35821;&#31181;whisper https://github.com/wenet-e2e/wenet/pull/2139 @xingchensong https://github.com/wenet-e2e/wenet/pull/2141
- &#20195;&#30721;&#31616;&#27905; &#23481;&#26131;diy 
- llm decoder onoy &#19979;&#20195;&#30721;&#20960;&#20046;&#19968;&#33268;
- &#26377; lora&#30340;pr,  https://github.com/wenet-e2e/wenet/pull/2049

&#35821;&#38899;&#22823;&#27169;&#22411;&#26159;&#19968;&#26041;&#38754;https://github.com/wenet-e2e/wenet/issues/2097&#65292; &#21478;&#22806;&#19968;&#20010;&#36335;&#23376;&#26159;&#21644;llm&#30340;&#32467;&#21512;&#65292; &#21518;&#32773;&#30446;&#21069;paper &#26085;&#30410;&#22686;&#22810;&#65292; &#32570;&#23569;&#21512;&#35821;&#38899;&#12289;llm&#30340;&#19968;&#20307;&#30340;&#31616;&#21333;&#26131;&#20110;diy/&#30740;&#31350;&#30340;repo

&#22312;&#27492;&#26377;&#20010;&#24819;&#27861;&#65292;wenet&#38598;&#25104;llm &#27604;&#22914;llama







# &#23447;&#26088;

1. &#25968;&#25454;&#12289;&#27169;&#22411;&#12289;&#20195;&#30721;&#20840;&#37096;&#37117;&#20250;&#24320;&#28304;&#24320;&#25918;&#65292;&#27426;&#36814;&#22823;&#23478;&#36129;&#29486;&#65292;&#26377;&#25968;&#25454;&#30340;&#20986;&#25968;&#25454;&#65292;&#26377;&#24847;&#35265;&#30340;&#20986;&#24847;&#35265;&#65292;&#26377;&#26426;&#22120;&#30340;&#20986;&#26426;&#22120;&#65292;&#22823;&#23478;&#20849;&#21019;&#12290;
2. &#19988;&#20570;&#19988;&#20998;&#26512;

# &#30446;&#26631;

1. base &#24403;&#21069;/&#26410;&#26469;&#35821;&#38899;&#22823;&#27169;&#22411;https://github.com/wenet-e2e/wenet/issues/2097/ + xxx llm&#65292; &#26500;&#36896;Audio+LLM&#30340;&#35821;&#38899;&#35821;&#35328;&#25171;&#27169;&#22411;&#65292;unify all speech task + speech multi-round chat ability
3. &#31215;&#32047;&#35821;&#38899;&#25152;&#26377;&#20219;&#21153;&#30340;&#25968;&#25454;&#20197;&#21450;&#26500;&#36896;audio instruct/prompt&#25968;&#25454;

# Action

## &#25968;&#25454;
-  [ ]  https://github.com/wenet-e2e/wenet/issues/2097#issue-1971787771
-  [ ] &#26500;&#36896;speech instruct/prompt

## &#35757;&#32451;
- [x] FSDP https://github.com/wenet-e2e/wenet/pull/2412
- [ ] convert xxxllm to wenet format
- [ ] &#35299;&#20915;llm base&#19979;&#35821;&#38899;&#28909;&#35789;/itn&#31561;&#38382;&#39064;
- [ ] generate/chat
- [x] tokenizer &#37325;&#26500;&#65306;https://github.com/wenet-e2e/wenet/issues/2142#issuecomment-1813736407 @Mddct 
- [x] &#26032;IO &#65292; &#25903;&#25345;&#21508;&#31181;&#28789;&#27963;&#30340;&#36755;&#20837; https://github.com/wenet-e2e/wenet/pull/2316
- [x] flash att: https://github.com/wenet-e2e/wenet/pull/2351
- [ ] adapter/lora
- [ ] &#34701;&#21512;&#26041;&#26696;&#65306;&#25193;&#35789;&#20856;+embedding 
- [ ] multi task https://github.com/QwenLM/Qwen-Audio (p0)
- [x] https://github.com/wenet-e2e/wenet/issues/2097#issue-1971787771
- [x]  wenet &#22686;&#21152; paraformer &#25903;&#25345;&#65288;&#30446;&#21069;&#26368;&#22909;&#30340;&#20013;&#25991;&#27169;&#22411;&#65292;&#21487;&#20197;&#29992;&#26469;&#20013;&#25991;speech&#22522;&#24231;&#65289;https://github.com/wenet-e2e/wenet/pull/2067
- [ ] generate tokens and can be used by speech generation (translation/tts)

## &#37096;&#32626;
- wenet.cpp (speech.cpp+xxxllm.cpp)
- [ ] int4&#37327;&#21270;&#65292;&#38477;&#20302;&#24102;&#23485;&#38656;&#27714;

## &#30446;&#21069;&#21487;&#34892;&#26041;&#26696;&#65292;
- https://github.com/salesforce/BLIP
- https://github.com/QwenLM/Qwen-Audio
- https://google-research.github.io/seanet/audiopalm/examples/
- etc
&#29305;&#28857;&#65306; &#20027;&#35201;&#20026;&#24494;&#35843;&#65292;&#24494;&#35843;&#23545;&#25968;&#25454;&#37327;&#35201;&#27714;&#19981;&#39640;&#65292;&#26041;&#27861;&#31867;&#20284;, 
&#38656;&#35201;&#22522;&#24231;&#65306; Llama + whisper + tune

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

［feats/llm］语音大模型背景下的llm集成 #2142

宗旨

目标

Action

数据

训练

部署

目前可行方案，

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

［feats/llm］语音大模型背景下的llm集成 #2142

Description

宗旨

目标

Action

数据

训练

部署

目前可行方案，

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions